diff --git a/.gitignore b/.gitignore
index 8ddfb11..e016965 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,5 +11,7 @@ scripts/ads-no-subtype
*.egg-info
*__pycache__
-# Ignore test cache
+# Ignore caches
.pytest_cache
+.ruff_cache
+.coverage
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d51c541
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.15.5
+ hooks:
+ - id: ruff-format
+ - id: ruff
+ args: [--fix]
diff --git a/README.md b/README.md
index 6ab6567..a24ba76 100644
--- a/README.md
+++ b/README.md
@@ -26,34 +26,20 @@ and position-based specifications.
---
## Recent Updates
-### 0.6.9
-
-- Fixed bugs in component parsers (class comparison, assignment operator, set literal)
-- Fixed `return` in `finally` block in requests searcher
-- Added captcha detection to feature extractor
-- Added captcha handling and jittered delay to demo searches
-- Dropped pandas from core dependencies
-- Cleaned up legacy typing imports
-- Removed poetry.toml
-
-### 0.6.8
-
-- Migrated from Poetry to uv for dependency management
-- Added Python 3.12-3.14 test matrix in GitHub Actions
-- Added `flights` classifier and `standard-4` layout
-- Added local service ad parser
-- Extracted bottom ads before main column
-- Fixed `return` in `finally` block warning in selenium searcher
-
-### 0.6.7
-
-- Added `get_text_by_selectors()` to `webutils` -- centralizes multi-selector fallback pattern across 7 component parsers
-- Added `perspectives`, `recent_posts`, and `latest_from` component classifiers
-- Added `sub_type` to perspectives parser from header text
-- Added CI test workflow on push to dev branch
-- Added compressed test fixtures with `condense_fixtures.py` script
-- Updated dependency lower bounds for security patches (protobuf, orjson)
-- Updated GitHub Actions to checkout v6 and setup-python v6
+### 0.7.0 (dev)
+
+- **Breaking:** `details` field is now always `dict | None` with a self-describing `type` key (e.g. `{"type": "menu", "items": [...]}`)
+- **Breaking:** `parse_serp()` now always returns a dict with `results` and `features` keys; the `extract_features` parameter has been removed
+- Standardized all models on Pydantic BaseModel (removed dataclasses)
+- Added `ResponseOutput` and `ParsedSERP` typed models
+- Removed `DetailsItem`, `DetailsList` classes
+- Normalized `local_results` sub_type for location-specific headers
+- Replaced `os` with `pathlib.Path` throughout
+- Consolidated `webutils.py` into `utils.py`
+- Added ruff formatting, linting, and pre-commit hooks
+- Added test coverage reporting (69%)
+- Added unit tests for utils, locations, models, and feature extractor
+- Replaced pandas with polars in demo scripts
---
## Getting Started
@@ -132,7 +118,7 @@ Example search and parse pipeline (via requests):
import WebSearcher as ws
se = ws.SearchEngine() # 1. Initialize collector
se.search('immigration news') # 2. Conduct a search
-se.parse_results() # 3. Parse search results
+se.parse_serp() # 3. Parse search results
se.save_serp(append_to='serps.json') # 4. Save HTML and metadata
se.save_results(append_to='results.json') # 5. Save parsed results
@@ -164,14 +150,14 @@ se.search('immigration news')
#### 3. Parse Search Results
-The example below is primarily for parsing search results as you collect HTML.
+The example below is primarily for parsing search results as you collect HTML.
See `ws.parse_serp(html)` for parsing existing HTML data.
```python
-se.parse_results()
+se.parse_serp()
# Show first result
-se.results[0]
+se.parsed.results[0]
{'section': 'main',
'cmpt_rank': 0,
'sub_rank': 0,
@@ -288,10 +274,34 @@ To release a new version:
---
## Update Log
+`0.7.0`
+- Standardize data models on Pydantic, typed details field, remove DetailsItem/DetailsList
+
+`0.6.9`
+- Fixed bugs in component parsers (class comparison, assignment operator, set literal)
+- Fixed `return` in `finally` block in requests searcher
+- Added captcha detection to feature extractor
+- Added captcha handling and jittered delay to demo searches
+- Dropped pandas from core dependencies
+- Cleaned up legacy typing imports
+- Removed poetry.toml
+
+`0.6.8`
+- Migrated from Poetry to uv for dependency management
+- Added Python 3.12-3.14 test matrix in GitHub Actions
+- Added `flights` classifier and `standard-4` layout
+- Added local service ad parser
+- Extracted bottom ads before main column
+- Fixed `return` in `finally` block warning in selenium searcher
+
`0.6.7`
-- Add `get_text_by_selectors()` utility, CI test workflow, compressed test fixtures
-- Add `perspectives`, `recent_posts`, `latest_from` classifiers and `sub_type` for perspectives
-- Update dependency bounds for security patches, GitHub Actions to v6
+- Added `get_text_by_selectors()` to `webutils` -- centralizes multi-selector fallback pattern across 7 component parsers
+- Added `perspectives`, `recent_posts`, and `latest_from` component classifiers
+- Added `sub_type` to perspectives parser from header text
+- Added CI test workflow on push to dev branch
+- Added compressed test fixtures with `condense_fixtures.py` script
+- Updated dependency lower bounds for security patches (protobuf, orjson)
+- Updated GitHub Actions to checkout v6 and setup-python v6
`0.6.6`
- Update packages with dependabot alerts (brotli, urllib3)
diff --git a/WebSearcher/__init__.py b/WebSearcher/__init__.py
index ade6ec8..f72b00e 100644
--- a/WebSearcher/__init__.py
+++ b/WebSearcher/__init__.py
@@ -1,8 +1,22 @@
-__version__ = "0.6.9"
-from .searchers import SearchEngine
-from .parsers import parse_serp
-from .feature_extractor import FeatureExtractor
+__version__ = "0.7.0"
+
+from .classifiers import ClassifyFooter, ClassifyMain
from .extractors import Extractor
+from .feature_extractor import FeatureExtractor
from .locations import download_locations
-from .classifiers import ClassifyMain, ClassifyFooter
-from .webutils import load_html, make_soup, load_soup
+from .parsers import parse_serp
+from .searchers import SearchEngine
+from .utils import load_html, load_soup, make_soup
+
+__all__ = [
+ "ClassifyFooter",
+ "ClassifyMain",
+ "Extractor",
+ "FeatureExtractor",
+ "download_locations",
+ "parse_serp",
+ "SearchEngine",
+ "load_html",
+ "load_soup",
+ "make_soup",
+]
diff --git a/WebSearcher/classifiers/__init__.py b/WebSearcher/classifiers/__init__.py
index 44c05bf..e215d84 100644
--- a/WebSearcher/classifiers/__init__.py
+++ b/WebSearcher/classifiers/__init__.py
@@ -1,4 +1,11 @@
-from .header_text import ClassifyHeaderText
+from .footer import ClassifyFooter
from .header_components import ClassifyHeaderComponent
+from .header_text import ClassifyHeaderText
from .main import ClassifyMain
-from .footer import ClassifyFooter
\ No newline at end of file
+
+__all__ = [
+ "ClassifyFooter",
+ "ClassifyHeaderComponent",
+ "ClassifyHeaderText",
+ "ClassifyMain",
+]
diff --git a/WebSearcher/classifiers/footer.py b/WebSearcher/classifiers/footer.py
index 58e162d..4f764d1 100644
--- a/WebSearcher/classifiers/footer.py
+++ b/WebSearcher/classifiers/footer.py
@@ -1,14 +1,15 @@
import bs4
-from .. import webutils
+
+from .. import utils
from .main import ClassifyMain
-class ClassifyFooter:
+class ClassifyFooter:
@staticmethod
def classify(cmpt: bs4.element.Tag) -> str:
layout_conditions = [
- ('id' in cmpt.attrs and cmpt.attrs['id'] in {'bres', 'brs'}),
- ('class' in cmpt.attrs and cmpt.attrs['class'] == ['MjjYud']),
+ ("id" in cmpt.attrs and cmpt.attrs["id"] in {"bres", "brs"}),
+ ("class" in cmpt.attrs and cmpt.attrs["class"] == ["MjjYud"]),
]
# Ordered list of classifiers to try
@@ -26,13 +27,14 @@ def classify(cmpt: bs4.element.Tag) -> str:
# Default unknown, exit on first successful classification
cmpt_type = "unknown"
for classifier in classifier_list:
- if cmpt_type != "unknown": break
+ if cmpt_type != "unknown":
+ break
cmpt_type = classifier(cmpt)
# Fall back to main classifier
- if cmpt_type == 'unknown':
+ if cmpt_type == "unknown":
cmpt_type = ClassifyMain.classify(cmpt)
-
+
return cmpt_type
@staticmethod
@@ -40,23 +42,25 @@ def discover_more(cmpt):
conditions = [
cmpt.find("g-scrolling-carousel"),
]
- return 'discover_more' if all(conditions) else "unknown"
+ return "discover_more" if all(conditions) else "unknown"
@staticmethod
def omitted_notice(cmpt):
conditions = [
- cmpt.find("p", {"id":"ofr"}),
- (webutils.get_text(cmpt, "h2") == "Notices about Filtered Results"),
+ cmpt.find("p", {"id": "ofr"}),
+ (utils.get_text(cmpt, "h2") == "Notices about Filtered Results"),
]
return "omitted_notice" if any(conditions) else "unknown"
@staticmethod
def searches_related(cmpt):
- known_labels = {'Related',
- 'Related searches',
- 'People also search for',
- 'Related to this search',
- 'Searches related to'}
- h3 = cmpt.find('h3')
+ known_labels = {
+ "Related",
+ "Related searches",
+ "People also search for",
+ "Related to this search",
+ "Searches related to",
+ }
+ h3 = cmpt.find("h3")
h3_matches = [h3.text.strip().startswith(text) for text in known_labels] if h3 else []
- return 'searches_related' if any(h3_matches) else 'unknown'
+ return "searches_related" if any(h3_matches) else "unknown"
diff --git a/WebSearcher/classifiers/header_components.py b/WebSearcher/classifiers/header_components.py
index c88218e..af640dc 100644
--- a/WebSearcher/classifiers/header_components.py
+++ b/WebSearcher/classifiers/header_components.py
@@ -1,6 +1,7 @@
-from .. import webutils
import bs4
+from .. import utils
+
class ClassifyHeaderComponent:
"""Classify a component from the header section based on its bs4.element.Tag"""
@@ -8,8 +9,8 @@ class ClassifyHeaderComponent:
@staticmethod
def classify(cmpt: bs4.element.Tag) -> str:
"""Classify the component type based on header text"""
-
+
cmpt_type = "unknown"
- if webutils.check_dict_value(cmpt.attrs, "id", ["taw", "topstuff"]):
+ if utils.check_dict_value(cmpt.attrs, "id", ["taw", "topstuff"]):
cmpt_type = "notice"
return cmpt_type
diff --git a/WebSearcher/classifiers/header_text.py b/WebSearcher/classifiers/header_text.py
index 5d67f29..d0ba3ce 100644
--- a/WebSearcher/classifiers/header_text.py
+++ b/WebSearcher/classifiers/header_text.py
@@ -1,7 +1,8 @@
import bs4
+
class ClassifyHeaderText:
- """Classify components based on header text (e.g.
title )"""
+ """Classify components based on header text (e.g. title )"""
@staticmethod
def classify(cmpt: bs4.element.Tag, levels: list[int] = [2, 3]) -> str:
@@ -12,7 +13,7 @@ def classify(cmpt: bs4.element.Tag, levels: list[int] = [2, 3]) -> str:
return "unknown"
@staticmethod
- def classify_header_lvl2(cmpt: bs4.element.Tag) -> str:
+ def classify_header_lvl2(cmpt: bs4.element.Tag) -> str:
return ClassifyHeaderText._classify_header(cmpt, level=2)
@staticmethod
@@ -26,8 +27,8 @@ def _classify_header(cmpt: bs4.element.Tag, level: int) -> str:
# Collect list of potential header divs
header_list = []
- header_list.extend(cmpt.find_all(f"h{level}", {"role":"heading"}))
- header_list.extend(cmpt.find_all(f"h{level}", {"class":["O3JH7", "q8U8x", "mfMhoc"]}))
+ header_list.extend(cmpt.find_all(f"h{level}", {"role": "heading"}))
+ header_list.extend(cmpt.find_all(f"h{level}", {"class": ["O3JH7", "q8U8x", "mfMhoc"]}))
header_list.extend(cmpt.find_all(attrs={"aria-level": f"{level}", "role": "heading"}))
# Check header text for known title matches
@@ -44,85 +45,102 @@ def _classify_header(cmpt: bs4.element.Tag, level: int) -> str:
@staticmethod
def _get_header_level_mapping(level) -> dict:
"""Return mapping of header level to header text"""
- options = {2: ClassifyHeaderText.TYPE_TO_H2_MAPPING,
- 3: ClassifyHeaderText.TYPE_TO_H3_MAPPING}
+ options = {
+ 2: ClassifyHeaderText.TYPE_TO_H2_MAPPING,
+ 3: ClassifyHeaderText.TYPE_TO_H3_MAPPING,
+ }
return options.get(level, {})
# WS type -> header level 2 text (e.g., title )
TYPE_TO_H2_MAPPING = {
- "directions": ["Directions",
- "Ubicaciones"],
- "discussions_and_forums": ["Discussions and forums",
- "Questions & answers"],
- "general": ["Complementary Results",
- "Web Result with Site Links",
- "Web results",
- "Resultados de la Web",
- "AI-powered overview",
- "Visión general creada por IA"],
- "images": ["Images",
- "Imágenes"],
- "jobs": ["Jobs",
- "Empleos"],
- "knowledge": ["Things to know",
- "Cosas que debes saber",
- "Calculator Result",
- "Featured snippet from the web", "Fragmento destacado",
- "Finance Results", "Resumen de Mercado",
- "From sources across the web",
- "Knowledge Result",
- "Resultado de traducci\u00f3n",
- "Sports Results",
- "Table", "Posiciones",
- "Stat Leaders", "Líderes de estadísticas",
- "Teams", "Equipos",
- "Players", "Jugadores",
- "Translation Result",
- "Unit Converter",
- "Weather Result", "Clima",
- "Artworks", "Obras de arte",
- "Songs", "Canciones",
- "Albums", "Álbumes",
- "About", "Información",
- "Profiles", "Perfiles"],
+ "directions": ["Directions", "Ubicaciones"],
+ "discussions_and_forums": ["Discussions and forums", "Questions & answers"],
+ "general": [
+ "Complementary Results",
+ "Web Result with Site Links",
+ "Web results",
+ "Resultados de la Web",
+ "AI-powered overview",
+ "Visión general creada por IA",
+ ],
+ "images": ["Images", "Imágenes"],
+ "jobs": ["Jobs", "Empleos"],
+ "knowledge": [
+ "Things to know",
+ "Cosas que debes saber",
+ "Calculator Result",
+ "Featured snippet from the web",
+ "Fragmento destacado",
+ "Finance Results",
+ "Resumen de Mercado",
+ "From sources across the web",
+ "Knowledge Result",
+ "Resultado de traducci\u00f3n",
+ "Sports Results",
+ "Table",
+ "Posiciones",
+ "Stat Leaders",
+ "Líderes de estadísticas",
+ "Teams",
+ "Equipos",
+ "Players",
+ "Jugadores",
+ "Translation Result",
+ "Unit Converter",
+ "Weather Result",
+ "Clima",
+ "Artworks",
+ "Obras de arte",
+ "Songs",
+ "Canciones",
+ "Albums",
+ "Álbumes",
+ "About",
+ "Información",
+ "Profiles",
+ "Perfiles",
+ ],
"latest_from": ["Latest from"],
- "local_news": ["Local news", "Noticias Locales",
- "Latest in local"],
+ "local_news": ["Local news", "Noticias Locales", "Latest in local"],
"local_results": [
"Local Results",
"Locations",
- "Places",
+ "Places",
"Sitios",
"Businesses",
"locations",
],
- "map_results": ["Map Results",
- "Choice Hotels",
- "Hoteles",
- "Hotel"],
+ "map_results": ["Map Results", "Choice Hotels", "Hoteles", "Hotel"],
"omitted_notice": ["Notices about Filtered Results"],
"people_also_ask": ["People also ask", "Más preguntas"],
- "perspectives": ["Perspectives & opinions",
- "Perspectives",
- "What people are saying"],
- "searches_related": ["Additional searches",
- "More searches", "Ver más",
- "Other searches",
- "People also search for", "También se buscó",
- "Related",
- "Related searches",
- "Related to this search",
- "Searches related to"],
- "top_stories": ["Top stories",
- "Noticias Destacadas",
- "Noticias Principales",
- "News",
- "Noticias",
- "Market news"],
- "recent_posts": ["Recent posts",
- "Latest posts from"],
+ "perspectives": [
+ "Perspectives & opinions",
+ "Perspectives",
+ "What people are saying",
+ ],
+ "searches_related": [
+ "Additional searches",
+ "More searches",
+ "Ver más",
+ "Other searches",
+ "People also search for",
+ "También se buscó",
+ "Related",
+ "Related searches",
+ "Related to this search",
+ "Searches related to",
+ ],
+ "top_stories": [
+ "Top stories",
+ "Noticias Destacadas",
+ "Noticias Principales",
+ "News",
+ "Noticias",
+ "Market news",
+ ],
+ "recent_posts": ["Recent posts", "Latest posts from"],
"twitter": ["Twitter Results"],
- "videos": ["Videos"]
+ "videos": ["Videos"],
}
# WS type -> header level 2 text (e.g., title )
@@ -137,10 +155,9 @@ def _get_header_level_mapping(level) -> dict:
"top_stories": ["Top stories", "Noticias destacadas", "Noticias Principales"],
"videos": ["Videos"],
"view_more_news": ["View more news", "Más noticias", "Ver más"],
- "view_more_videos": ["View more videos", "Más videos", "Ver más"]
+ "view_more_videos": ["View more videos", "Más videos", "Ver más"],
}
# Invert from {label: [text, ...]} to [{text: label}, ...]
TYPE_TO_H2_MAPPING = {vv: k for k, v in TYPE_TO_H2_MAPPING.items() for vv in v}
TYPE_TO_H3_MAPPING = {vv: k for k, v in TYPE_TO_H3_MAPPING.items() for vv in v}
-
diff --git a/WebSearcher/classifiers/main.py b/WebSearcher/classifiers/main.py
index 9ecd121..2b77910 100644
--- a/WebSearcher/classifiers/main.py
+++ b/WebSearcher/classifiers/main.py
@@ -1,60 +1,62 @@
import bs4
-from .. import logger
-log = logger.Logger().start(__name__)
+from .. import logger, utils
from .header_text import ClassifyHeaderText
-from .. import webutils
+
+log = logger.Logger().start(__name__)
+
class ClassifyMain:
- """Classify a component from the main section based on its bs4.element.Tag """
+ """Classify a component from the main section based on its bs4.element.Tag"""
@staticmethod
def classify(cmpt: bs4.element.Tag) -> str:
# Ordered list of classifiers to try
component_classifiers = [
- ClassifyMain.locations, # Check locations (hotels, etc.) before top_stories
- ClassifyMain.top_stories, # Check top stories
- ClassifyMain.discussions_and_forums, # Check discussions and forums
- ClassifyHeaderText.classify, # Check levels 2 & 3 header text
- ClassifyMain.news_quotes, # Check news quotes
- ClassifyMain.img_cards, # Check image cards
- ClassifyMain.images, # Check images
- ClassifyMain.ai_overview, # Check AI overview
- ClassifyMain.knowledge_panel, # Check knowledge panel
- ClassifyMain.knowledge_block, # Check knowledge components
- ClassifyMain.banner, # Check for banners
- ClassifyMain.finance_panel, # Check finance panel (classify as knowledge)
- ClassifyMain.map_result, # Check for map results
+ ClassifyMain.locations, # Check locations (hotels, etc.) before top_stories
+ ClassifyMain.top_stories, # Check top stories
+ ClassifyMain.discussions_and_forums, # Check discussions and forums
+ ClassifyHeaderText.classify, # Check levels 2 & 3 header text
+ ClassifyMain.news_quotes, # Check news quotes
+ ClassifyMain.img_cards, # Check image cards
+ ClassifyMain.images, # Check images
+ ClassifyMain.ai_overview, # Check AI overview
+ ClassifyMain.knowledge_panel, # Check knowledge panel
+ ClassifyMain.knowledge_block, # Check knowledge components
+ ClassifyMain.banner, # Check for banners
+ ClassifyMain.finance_panel, # Check finance panel (classify as knowledge)
+ ClassifyMain.map_result, # Check for map results
ClassifyMain.general_questions, # Check hybrid general questions
- ClassifyMain.short_videos, # Check short videos carousel
- ClassifyMain.twitter, # Check twitter cards and results
- ClassifyMain.flights, # Check flights widgets
- ClassifyMain.general, # Check general components
- ClassifyMain.people_also_ask, # Check people also ask
- ClassifyMain.knowledge_box, # Check flights, maps, hotels, events, jobs
- ClassifyMain.local_results, # Check for local results
- ClassifyMain.available_on, # Check for available on
+ ClassifyMain.short_videos, # Check short videos carousel
+ ClassifyMain.twitter, # Check twitter cards and results
+ ClassifyMain.flights, # Check flights widgets
+ ClassifyMain.general, # Check general components
+ ClassifyMain.people_also_ask, # Check people also ask
+ ClassifyMain.knowledge_box, # Check flights, maps, hotels, events, jobs
+ ClassifyMain.local_results, # Check for local results
+ ClassifyMain.available_on, # Check for available on
]
# Default unknown, exit on first successful classification
cmpt_type = "unknown"
for classifier in component_classifiers:
- if cmpt_type != "unknown": break
+ if cmpt_type != "unknown":
+ break
cmpt_type = classifier(cmpt)
-
+
return cmpt_type
@staticmethod
def discussions_and_forums(cmpt: bs4.element.Tag) -> str:
heading = cmpt.find("div", {"class": "IFnjPb", "role": "heading"})
if heading and heading.get_text(strip=True).startswith("Discussions and forums"):
- return 'discussions_and_forums'
+ return "discussions_and_forums"
return "unknown"
@staticmethod
def available_on(cmpt: bs4.element.Tag) -> str:
- conditions = [("/Available on" in webutils.get_text(cmpt))]
+ conditions = [("/Available on" in utils.get_text(cmpt))]
return "available_on" if any(conditions) else "unknown"
@staticmethod
@@ -63,19 +65,19 @@ def banner(cmpt: bs4.element.Tag) -> str:
"ULSxyf" in cmpt.attrs.get("class", []),
cmpt.find("div", {"class": "uzjuFc"}),
]
- return 'banner' if all(conditions) else "unknown"
+ return "banner" if all(conditions) else "unknown"
@staticmethod
def finance_panel(cmpt: bs4.element.Tag) -> str:
condition = cmpt.find("div", {"id": "knowledge-finance-wholepage__entity-summary"})
- return 'knowledge' if condition else "unknown"
+ return "knowledge" if condition else "unknown"
@staticmethod
def flights(cmpt: bs4.element.Tag) -> str:
"""Classify Google Flights widgets (prices, status)"""
- heading = cmpt.find(attrs={'role': 'heading'})
- if heading and heading.get_text(strip=True).startswith('Flight'):
- return 'flights'
+ heading = cmpt.find(attrs={"role": "heading"})
+ if heading and heading.get_text(strip=True).startswith("Flight"):
+ return "flights"
return "unknown"
@staticmethod
@@ -85,26 +87,27 @@ def general(cmpt: bs4.element.Tag) -> str:
if "class" in cmpt.attrs:
conditions_dict = {
"format-01": cmpt.attrs["class"] == ["g"],
- "format-02": ( ("g" in cmpt.attrs["class"]) &
- any(s in ["Ww4FFb"] for s in cmpt.attrs["class"]) ),
+ "format-02": (
+ ("g" in cmpt.attrs["class"]) & any(s in ["Ww4FFb"] for s in cmpt.attrs["class"])
+ ),
"format-03": any(s in ["hlcw0c", "MjjYud", "PmEWq"] for s in cmpt.attrs["class"]),
- "format-04": cmpt.find('div', {'class': ['g', 'Ww4FFb']}),
+ "format-04": cmpt.find("div", {"class": ["g", "Ww4FFb"]}),
}
- else:
+ else:
conditions_dict = {
- 'format-05': all(cmpt.find("div", {"class": c}) for c in ["g", "d4rhi"]),
+ "format-05": all(cmpt.find("div", {"class": c}) for c in ["g", "d4rhi"]),
}
layout_matches = [k for k, v in conditions_dict.items() if v]
# log.debug(f"general layout: {layout_matches}")
-
- return 'general' if any(layout_matches) else "unknown"
+
+ return "general" if any(layout_matches) else "unknown"
@staticmethod
def general_questions(cmpt: bs4.element.Tag) -> str:
hybrid = cmpt.find("div", {"class": "ifM9O"})
g_accordian = cmpt.find("g-accordion")
- return 'general_questions' if hybrid and g_accordian else "unknown"
+ return "general_questions" if hybrid and g_accordian else "unknown"
@staticmethod
def img_cards(cmpt: bs4.element.Tag) -> str:
@@ -114,17 +117,17 @@ def img_cards(cmpt: bs4.element.Tag) -> str:
any(s in ["hlcw0c", "MjjYud"] for s in cmpt.attrs["class"]),
cmpt.find("block-component"),
]
- return 'img_cards' if all(conditions) else "unknown"
+ return "img_cards" if all(conditions) else "unknown"
else:
return "unknown"
@staticmethod
def images(cmpt: bs4.element.Tag) -> str:
conditions = [
- cmpt.find("div", {"id": "imagebox_bigimages"}),
- cmpt.find("div", {"id":"iur"})
+ cmpt.find("div", {"id": "imagebox_bigimages"}),
+ cmpt.find("div", {"id": "iur"}),
]
- return 'images' if any(conditions) else "unknown"
+ return "images" if any(conditions) else "unknown"
@staticmethod
def ai_overview(cmpt: bs4.element.Tag) -> str:
@@ -133,33 +136,32 @@ def ai_overview(cmpt: bs4.element.Tag) -> str:
cmpt.find("div", {"class": "Fzsovc"}),
cmpt.find("h2") and cmpt.find("h2").get_text(strip=True) == "AI Overview",
]
- return 'knowledge' if any(conditions) else "unknown"
+ return "knowledge" if any(conditions) else "unknown"
@staticmethod
def knowledge_block(cmpt: bs4.element.Tag) -> str:
"""Classify knowledge block components"""
conditions = [
- webutils.check_dict_value(cmpt.attrs, "class", ["ULSxyf"]),
- cmpt.find('block-component'),
+ utils.check_dict_value(cmpt.attrs, "class", ["ULSxyf"]),
+ cmpt.find("block-component"),
]
- return 'knowledge' if all(conditions) else "unknown"
+ return "knowledge" if all(conditions) else "unknown"
@staticmethod
def knowledge_box(cmpt: bs4.element.Tag) -> str:
"""Classify knowledge component types"""
attrs = cmpt.attrs
condition = {}
- condition['flights'] = (
- (webutils.check_dict_value(attrs, "jscontroller", "Z2bSc")) |
- bool(cmpt.find("div", {"jscontroller": "Z2bSc"}))
+ condition["flights"] = (utils.check_dict_value(attrs, "jscontroller", "Z2bSc")) | bool(
+ cmpt.find("div", {"jscontroller": "Z2bSc"})
)
- condition['maps'] = webutils.check_dict_value(attrs, "data-hveid", "CAMQAA")
- condition['locations'] = cmpt.find("div", {"class": "zd2Jbb"})
- condition['events'] = cmpt.find("g-card", {"class": "URhAHe"})
- condition['jobs'] = cmpt.find("g-card", {"class": "cvoI5e"})
+ condition["maps"] = utils.check_dict_value(attrs, "data-hveid", "CAMQAA")
+ condition["locations"] = cmpt.find("div", {"class": "zd2Jbb"})
+ condition["events"] = cmpt.find("g-card", {"class": "URhAHe"})
+ condition["jobs"] = cmpt.find("g-card", {"class": "cvoI5e"})
text_list = list(cmpt.stripped_strings)
if text_list:
- condition['covid_alert'] = (text_list[0] == "COVID-19 alert")
+ condition["covid_alert"] = text_list[0] == "COVID-19 alert"
for condition_type, conditions in condition.items():
if conditions:
return condition_type
@@ -169,84 +171,86 @@ def knowledge_box(cmpt: bs4.element.Tag) -> str:
def knowledge_panel(cmpt: bs4.element.Tag) -> str:
conditions = [
cmpt.find("h1", {"class": "VW3apb"}),
- cmpt.find("div", {"class": ["knowledge-panel", "knavi", "kp-blk", "kp-wholepage-osrp"]}),
+ cmpt.find(
+ "div",
+ {"class": ["knowledge-panel", "knavi", "kp-blk", "kp-wholepage-osrp"]},
+ ),
cmpt.find("div", {"aria-label": "Featured results", "role": "complementary"}),
cmpt.find("div", {"jscontroller": "qTdDb"}),
- webutils.check_dict_value(cmpt.attrs, "jscontroller", "qTdDb"),
- cmpt.find('div', {'class':'obcontainer'})
+ utils.check_dict_value(cmpt.attrs, "jscontroller", "qTdDb"),
+ cmpt.find("div", {"class": "obcontainer"}),
]
- return 'knowledge' if any(conditions) else "unknown"
+ return "knowledge" if any(conditions) else "unknown"
@staticmethod
def local_results(cmpt: bs4.element.Tag) -> str:
conditions = [
cmpt.find("div", {"class": "Qq3Lb"}), # Places
- cmpt.find("div", {"class": "VkpGBb"}) # Local Results
+ cmpt.find("div", {"class": "VkpGBb"}), # Local Results
]
- return 'local_results' if any(conditions) else "unknown"
+ return "local_results" if any(conditions) else "unknown"
@staticmethod
def map_result(cmpt: bs4.element.Tag) -> str:
condition = cmpt.find("div", {"class": "lu_map_section"})
- return 'map_results' if condition else "unknown"
+ return "map_results" if condition else "unknown"
@staticmethod
def people_also_ask(cmpt: bs4.element.Tag) -> str:
"""Secondary check for people also ask, see classify_header for primary"""
class_list = ["g", "kno-kp", "mnr-c", "g-blk"]
- conditions = webutils.check_dict_value(cmpt.attrs, "class", class_list)
- return 'people_also_ask' if conditions else "unknown"
+ conditions = utils.check_dict_value(cmpt.attrs, "class", class_list)
+ return "people_also_ask" if conditions else "unknown"
@staticmethod
def short_videos(cmpt: bs4.element.Tag) -> str:
"""Classify short videos carousel"""
- heading = cmpt.find('span', {'role': 'heading', 'class': 'IFnjPb'})
- if heading and heading.get_text(strip=True) == 'Short videos':
- return 'short_videos'
+ heading = cmpt.find("span", {"role": "heading", "class": "IFnjPb"})
+ if heading and heading.get_text(strip=True) == "Short videos":
+ return "short_videos"
return "unknown"
@staticmethod
def locations(cmpt: bs4.element.Tag) -> str:
"""Classify locations components (hotels, etc.)"""
- heading = cmpt.find(attrs={'role': 'heading'})
+ heading = cmpt.find(attrs={"role": "heading"})
if heading:
text = heading.get_text(strip=True)
- if text.startswith('Hotels') or text.startswith('More Hotels'):
- return 'locations'
+ if text.startswith("Hotels") or text.startswith("More Hotels"):
+ return "locations"
return "unknown"
@staticmethod
def top_stories(cmpt: bs4.element.Tag) -> str:
"""Classify top stories components"""
conditions = [
- cmpt.find("g-scrolling-carousel"),
- cmpt.find("div", {"id": "tvcap"})
+ cmpt.find("g-scrolling-carousel"),
+ cmpt.find("div", {"id": "tvcap"}),
]
- return 'top_stories' if all(conditions) else "unknown"
+ return "top_stories" if all(conditions) else "unknown"
@staticmethod
def news_quotes(cmpt: bs4.element.Tag) -> str:
"""Classify top stories components"""
header_div = cmpt.find("g-tray-header", role="heading")
- condition = webutils.get_text(header_div, strip=True) == "News quotes"
- return 'news_quotes' if condition else "unknown"
+ condition = utils.get_text(header_div, strip=True) == "News quotes"
+ return "news_quotes" if condition else "unknown"
@staticmethod
def twitter(cmpt: bs4.element.Tag) -> str:
- cmpt_type = 'twitter' if cmpt.find('div', {'class': 'eejeod'}) else "unknown"
+ cmpt_type = "twitter" if cmpt.find("div", {"class": "eejeod"}) else "unknown"
cmpt_type = ClassifyMain.twitter_type(cmpt, cmpt_type)
return cmpt_type
@staticmethod
def twitter_type(cmpt: bs4.element.Tag, cmpt_type="unknown") -> str:
- """ Distinguish twitter types ('twitter_cards', 'twitter_result')"""
- cmpt_prev = cmpt.find_previous()
+ """Distinguish twitter types ('twitter_cards', 'twitter_result')"""
conditions = [
- (cmpt_type == 'twitter'), # Check type (header text)
- webutils.get_text(cmpt, strip=True) == "Twitter Results" # Check text
+ (cmpt_type == "twitter"), # Check type (header text)
+ utils.get_text(cmpt, strip=True) == "Twitter Results", # Check text
]
if any(conditions):
# Differentiate twitter cards (carousel) and twitter result (single)
carousel = cmpt.find("g-scrolling-carousel")
cmpt_type = "twitter_cards" if carousel else "twitter_result"
- return cmpt_type
\ No newline at end of file
+ return cmpt_type
diff --git a/WebSearcher/component_parsers/__init__.py b/WebSearcher/component_parsers/__init__.py
index 9da8a4c..324490c 100644
--- a/WebSearcher/component_parsers/__init__.py
+++ b/WebSearcher/component_parsers/__init__.py
@@ -1,105 +1,104 @@
-
-from .notices import parse_notices
-from .top_image_carousel import parse_top_image_carousel
-
from .ads import parse_ads
from .available_on import parse_available_on
from .banner import parse_banner
from .discussions_and_forums import parse_discussions_and_forums
+from .footer import Footer
from .general import parse_general_results
from .general_questions import parse_general_questions
from .images import parse_images
from .knowledge import parse_knowledge_panel
-
-from .top_stories import parse_top_stories
+from .knowledge_rhs import parse_knowledge_rhs
from .latest_from import parse_latest_from
from .local_news import parse_local_news
-from .perspectives import parse_perspectives
-from .recent_posts import parse_recent_posts
-
from .local_results import parse_local_results
from .locations import parse_locations
from .map_results import parse_map_results
from .news_quotes import parse_news_quotes
+from .notices import parse_notices
from .people_also_ask import parse_people_also_ask
+from .perspectives import parse_perspectives
+from .recent_posts import parse_recent_posts
from .scholarly_articles import parse_scholarly_articles
from .searches_related import parse_searches_related
-from .short_videos import parse_short_videos
from .shopping_ads import parse_shopping_ads
+from .short_videos import parse_short_videos
+from .top_image_carousel import parse_top_image_carousel
+from .top_stories import parse_top_stories
from .twitter_cards import parse_twitter_cards
from .twitter_result import parse_twitter_result
from .videos import parse_videos
from .view_more_news import parse_view_more_news
-from .footer import Footer
-from .knowledge_rhs import parse_knowledge_rhs
-
# Header parsers
header_parsers = [
("notice", parse_notices, "Notices"),
- ('top_image_carousel', parse_top_image_carousel, 'Top Image Carousel'),
+ ("top_image_carousel", parse_top_image_carousel, "Top Image Carousel"),
]
-header_parser_dict = {i[0]:i[1] for i in header_parsers} # Format {type: function}
-header_parser_labels = {i[0]:i[2] for i in header_parsers} # Format {type: label}
+header_parser_dict = {i[0]: i[1] for i in header_parsers} # Format {type: function}
+header_parser_labels = {i[0]: i[2] for i in header_parsers} # Format {type: label}
# Component details dataframe
-columns = ['type', 'func', 'label']
+columns = ["type", "func", "label"]
main_parsers = [
- ('ad', parse_ads, 'Ad'),
- ('available_on', parse_available_on, 'Available On'),
- ('banner', parse_banner, 'Banner'),
- ('discussions_and_forums', parse_discussions_and_forums, 'Discussions & Forums'),
- ('general', parse_general_results, 'General'),
- ('general_questions', parse_general_questions, 'General Questions'),
- ('images', parse_images, 'Images'),
- ('knowledge', parse_knowledge_panel, 'Knowledge'),
- ('latest_from', parse_latest_from, 'Latest From'),
- ('local_news', parse_local_news, 'Local News'),
- ('local_results', parse_local_results, 'Local Results'),
- ('locations', parse_locations, 'Locations'),
- ('map_results', parse_map_results, 'Map Results'),
- ('news_quotes', parse_news_quotes, 'News Quotes'),
- ('people_also_ask', parse_people_also_ask, 'People Also Ask'),
- ('perspectives', parse_perspectives, 'Perspectives & Opinions'),
- ('recent_posts', parse_recent_posts, 'Recent Posts'),
- ('scholarly_articles', parse_scholarly_articles, 'Scholar Articles'),
- ('searches_related', parse_searches_related, 'Related Searches'),
- ('short_videos', parse_short_videos, 'Short Videos'),
- ('shopping_ads', parse_shopping_ads, 'Shopping Ad'),
- ('top_stories', parse_top_stories, 'Top Stories'),
- ('twitter_cards', parse_twitter_cards, 'Twitter Cards'),
- ('twitter_result', parse_twitter_result, 'Twitter Result'),
- ('videos', parse_videos, 'Videos'),
- ('view_more_news', parse_view_more_news, 'View More News'),
- ('knowledge_rhs', parse_knowledge_rhs, 'Knowledge RHS'),
+ ("ad", parse_ads, "Ad"),
+ ("available_on", parse_available_on, "Available On"),
+ ("banner", parse_banner, "Banner"),
+ ("discussions_and_forums", parse_discussions_and_forums, "Discussions & Forums"),
+ ("general", parse_general_results, "General"),
+ ("general_questions", parse_general_questions, "General Questions"),
+ ("images", parse_images, "Images"),
+ ("knowledge", parse_knowledge_panel, "Knowledge"),
+ ("latest_from", parse_latest_from, "Latest From"),
+ ("local_news", parse_local_news, "Local News"),
+ ("local_results", parse_local_results, "Local Results"),
+ ("locations", parse_locations, "Locations"),
+ ("map_results", parse_map_results, "Map Results"),
+ ("news_quotes", parse_news_quotes, "News Quotes"),
+ ("people_also_ask", parse_people_also_ask, "People Also Ask"),
+ ("perspectives", parse_perspectives, "Perspectives & Opinions"),
+ ("recent_posts", parse_recent_posts, "Recent Posts"),
+ ("scholarly_articles", parse_scholarly_articles, "Scholar Articles"),
+ ("searches_related", parse_searches_related, "Related Searches"),
+ ("short_videos", parse_short_videos, "Short Videos"),
+ ("shopping_ads", parse_shopping_ads, "Shopping Ad"),
+ ("top_stories", parse_top_stories, "Top Stories"),
+ ("twitter_cards", parse_twitter_cards, "Twitter Cards"),
+ ("twitter_result", parse_twitter_result, "Twitter Result"),
+ ("videos", parse_videos, "Videos"),
+ ("view_more_news", parse_view_more_news, "View More News"),
+ ("knowledge_rhs", parse_knowledge_rhs, "Knowledge RHS"),
]
-main_parser_dict = {i[0]:i[1] for i in main_parsers} # Format {type: function}
-main_parser_labels = {i[0]:i[2] for i in main_parsers} # Format {type: label}
+main_parser_dict = {i[0]: i[1] for i in main_parsers} # Format {type: function}
+main_parser_labels = {i[0]: i[2] for i in main_parsers} # Format {type: label}
# Footer parsers
footer_parsers = [
- ('img_cards', Footer.parse_image_cards, 'Image Cards'),
- ('searches_related', parse_searches_related, 'Related Searches'),
- ('discover_more', Footer.parse_discover_more, 'Discover More'),
- ('general', parse_general_results, 'General'),
- ('people_also_ask', parse_people_also_ask, 'People Also Ask'),
- ('omitted_notice', Footer.parse_omitted_notice, 'Omitted Notice'),
+ ("img_cards", Footer.parse_image_cards, "Image Cards"),
+ ("searches_related", parse_searches_related, "Related Searches"),
+ ("discover_more", Footer.parse_discover_more, "Discover More"),
+ ("general", parse_general_results, "General"),
+ ("people_also_ask", parse_people_also_ask, "People Also Ask"),
+ ("omitted_notice", Footer.parse_omitted_notice, "Omitted Notice"),
]
-footer_parser_dict = {i[0]:i[1] for i in footer_parsers} # Format {type: function}
-footer_parser_labels = {i[0]:i[2] for i in footer_parsers} # Format {type: label}
+footer_parser_dict = {i[0]: i[1] for i in footer_parsers} # Format {type: function}
+footer_parser_labels = {i[0]: i[2] for i in footer_parsers} # Format {type: label}
def parse_unknown(cmpt) -> list:
- parsed_result = {'type': cmpt.type,
- 'cmpt_rank': cmpt.cmpt_rank,
- 'text': cmpt.elem.get_text("<|>", strip=True) if cmpt.elem else None}
+ parsed_result = {
+ "type": cmpt.type,
+ "cmpt_rank": cmpt.cmpt_rank,
+ "text": cmpt.elem.get_text("<|>", strip=True) if cmpt.elem else None,
+ }
return [parsed_result]
def parse_not_implemented(cmpt) -> list:
"""Placeholder function for component parsers that are not implemented"""
- parsed_result = {'type': cmpt.type,
- 'cmpt_rank': cmpt.cmpt_rank,
- 'text': cmpt.elem.get_text("<|>", strip=True),
- 'error': 'not implemented'}
+ parsed_result = {
+ "type": cmpt.type,
+ "cmpt_rank": cmpt.cmpt_rank,
+ "text": cmpt.elem.get_text("<|>", strip=True),
+ "error": "not implemented",
+ }
return [parsed_result]
diff --git a/WebSearcher/component_parsers/ads.py b/WebSearcher/component_parsers/ads.py
index a67433e..4a401a9 100644
--- a/WebSearcher/component_parsers/ads.py
+++ b/WebSearcher/component_parsers/ads.py
@@ -1,8 +1,8 @@
-""" Parsers for ad components
+"""Parsers for ad components
Changelog
---------
-2024-05-08:
+2024-05-08:
- added new div class for text field
- added labels (e.g., "Provides abortions") from , appended to text field
@@ -11,8 +11,8 @@
"""
import bs4
-from .. import webutils
-from ..models.data import BaseResult, DetailsItem, DetailsList
+
+from .. import utils
from .shopping_ads import parse_shopping_ads
SUB_TYPES = [
@@ -25,37 +25,37 @@
]
AD_STANDARD_TEXT_SELECTORS = [
- ('div', {'class': 'yDYNvb'}),
- ('div', {'class': 'Va3FIb'}),
+ ("div", {"class": "yDYNvb"}),
+ ("div", {"class": "Va3FIb"}),
]
def classify_ad_type(cmpt: bs4.element.Tag) -> str:
"""Classify the type of ad component"""
label_divs = {
- "legacy": webutils.find_all_divs(cmpt, 'div', {'class': 'ad_cclk'}),
- "local_service": cmpt.find_all('gls-profile-entrypoint'),
- "secondary": webutils.find_all_divs(cmpt, 'div', {'class': 'd5oMvf'}),
- "shopping": webutils.find_all_divs(cmpt, 'div', {'class': 'commercial-unit-desktop-top'}),
- "standard": webutils.find_all_divs(cmpt, 'div', {'class': 'uEierd'}),
- "carousel": webutils.find_all_divs(cmpt, 'g-scrolling-carousel'),
+ "legacy": utils.find_all_divs(cmpt, "div", {"class": "ad_cclk"}),
+ "local_service": cmpt.find_all("gls-profile-entrypoint"),
+ "secondary": utils.find_all_divs(cmpt, "div", {"class": "d5oMvf"}),
+ "shopping": utils.find_all_divs(cmpt, "div", {"class": "commercial-unit-desktop-top"}),
+ "standard": utils.find_all_divs(cmpt, "div", {"class": "uEierd"}),
+ "carousel": utils.find_all_divs(cmpt, "g-scrolling-carousel"),
}
for label, divs in label_divs.items():
if divs:
return label
- return 'unknown'
+ return "unknown"
def parse_ads(cmpt: bs4.element.Tag) -> list:
"""Parse ads from ad component"""
subtype_parsers = {
- 'legacy': parse_ad_legacy,
- 'local_service': parse_ad_local_service,
- 'secondary': parse_ad_secondary,
- 'shopping': parse_ad_shopping,
- 'standard': parse_ad_standard,
- 'carousel': parse_ad_carousel,
+ "legacy": parse_ad_legacy,
+ "local_service": parse_ad_local_service,
+ "secondary": parse_ad_secondary,
+ "shopping": parse_ad_shopping,
+ "standard": parse_ad_standard,
+ "carousel": parse_ad_carousel,
}
parsed_list = []
sub_type = classify_ad_type(cmpt)
@@ -64,253 +64,250 @@ def parse_ads(cmpt: bs4.element.Tag) -> list:
parsed_list = parser(cmpt)
return parsed_list
+
# ------------------------------------------------------------------------------
+
def parse_ad_legacy(cmpt: bs4.element.Tag) -> list:
def _parse_ad_legacy(cmpt: bs4.element.Tag) -> list:
- subs = cmpt.find_all('li', {'class': 'ads-ad'})
+ subs = cmpt.find_all("li", {"class": "ads-ad"})
return [_parse_ad_legacy_sub(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
def _parse_ad_legacy_sub(sub: bs4.element.Tag, sub_rank: int) -> dict:
- header = sub.find('div', {'class': 'ad_cclk'})
- parsed = BaseResult(
- type='ad',
- sub_type='legacy',
- sub_rank=sub_rank,
- title=webutils.get_text(header, 'h3'),
- url=webutils.get_text(header, 'cite'),
- cite=None,
- text=webutils.get_text(sub, 'div', {'class': 'ads-creative'}),
- details=_parse_ad_legacy_sub_details(sub),
- error=None
- ).model_dump()
- return parsed
-
- def _parse_ad_legacy_sub_details(sub: bs4.element.Tag) -> list:
- details_list = DetailsList()
- bottom_text = sub.find('ul')
+ header = sub.find("div", {"class": "ad_cclk"})
+ return {
+ "type": "ad",
+ "sub_type": "legacy",
+ "sub_rank": sub_rank,
+ "title": utils.get_text(header, "h3"),
+ "url": utils.get_text(header, "cite"),
+ "cite": None,
+ "text": utils.get_text(sub, "div", {"class": "ads-creative"}),
+ "details": _parse_ad_legacy_sub_details(sub),
+ }
+
+ def _parse_ad_legacy_sub_details(sub: bs4.element.Tag) -> dict | None:
+ items = []
+ bottom_text = sub.find("ul")
if bottom_text:
- for li in bottom_text.find_all('li'):
- details_list.append(DetailsItem(text=li.get_text(separator=' ')))
- return details_list.to_dicts()
+ for li in bottom_text.find_all("li"):
+ items.append(li.get_text(separator=" "))
+ return {"type": "text", "items": items} if items else None
return _parse_ad_legacy(cmpt)
+
# ------------------------------------------------------------------------------
+
def parse_ad_local_service(cmpt: bs4.element.Tag) -> list:
"""Parse local service ads (gls-profile-entrypoint elements)"""
def _parse_profile(profile: bs4.element.Tag, sub_rank: int) -> dict:
- title = webutils.get_text(profile, 'span', {'class': 'bk5vhd'})
- url = webutils.get_link(profile)
-
- detail_rows = profile.find_all('div', {'class': 'P4vvKf'})
- text = ' · '.join(
- row.get_text(' ', strip=True) for row in detail_rows
- ) if detail_rows else None
-
- details = DetailsList()
- rating_span = profile.find('span', attrs={'aria-label': True})
+ title = utils.get_text(profile, "span", {"class": "bk5vhd"})
+ url = utils.get_link(profile)
+
+ detail_rows = profile.find_all("div", {"class": "P4vvKf"})
+ text = (
+ " · ".join(row.get_text(" ", strip=True) for row in detail_rows)
+ if detail_rows
+ else None
+ )
+
+ details = None
+ rating_span = profile.find("span", attrs={"aria-label": True})
if rating_span:
- details.append(DetailsItem(text=rating_span['aria-label']))
-
- return BaseResult(
- type='ad',
- sub_type='local_service',
- sub_rank=sub_rank,
- title=title,
- url=url,
- cite=None,
- text=text,
- details=details.to_dicts(),
- error=None
- ).model_dump()
-
- profiles = cmpt.find_all('gls-profile-entrypoint')
+ details = {"type": "text", "items": [rating_span["aria-label"]]}
+
+ return {
+ "type": "ad",
+ "sub_type": "local_service",
+ "sub_rank": sub_rank,
+ "title": title,
+ "url": url,
+ "cite": None,
+ "text": text,
+ "details": details,
+ }
+
+ profiles = cmpt.find_all("gls-profile-entrypoint")
return [_parse_profile(p, i) for i, p in enumerate(profiles)]
+
# ------------------------------------------------------------------------------
+
def parse_ad_secondary(cmpt: bs4.element.Tag) -> list:
def _parse_ad_secondary(cmpt: bs4.element.Tag) -> list:
- subs = cmpt.find_all('li', {'class': 'ads-fr'})
+ subs = cmpt.find_all("li", {"class": "ads-fr"})
return [_parse_ad_secondary_sub(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
def _parse_ad_secondary_sub(sub: bs4.element.Tag, sub_rank: int) -> dict:
- return BaseResult(
- type='ad',
- sub_type='secondary',
- sub_rank=sub_rank,
- title=webutils.get_text(sub, 'div', {'role': 'heading'}),
- url=_parse_ad_secondary_sub_url(sub),
- cite=webutils.get_text(sub, 'span', {'class': 'gBIQub'}),
- text=_parse_ad_secondary_sub_text(sub),
- details=_parse_ad_secondary_sub_details(sub),
- error=None
- ).model_dump()
-
+ return {
+ "type": "ad",
+ "sub_type": "secondary",
+ "sub_rank": sub_rank,
+ "title": utils.get_text(sub, "div", {"role": "heading"}),
+ "url": _parse_ad_secondary_sub_url(sub),
+ "cite": utils.get_text(sub, "span", {"class": "gBIQub"}),
+ "text": _parse_ad_secondary_sub_text(sub),
+ "details": _parse_ad_secondary_sub_details(sub),
+ }
+
def _parse_ad_secondary_sub_url(sub: bs4.element.Tag) -> str:
- url_div = webutils.get_div(sub, 'div', {'class': 'd5oMvf'})
- return webutils.get_link(url_div)
+ url_div = utils.get_div(sub, "div", {"class": "d5oMvf"})
+ return utils.get_link(url_div)
def _parse_ad_secondary_sub_text(sub) -> str:
- text_divs = sub.find_all('div', {'class': 'yDYNvb'})
- return '|'.join([d.text for d in text_divs]) if text_divs else ''
+ text_divs = sub.find_all("div", {"class": "yDYNvb"})
+ return "|".join([d.text for d in text_divs]) if text_divs else ""
- def _parse_ad_secondary_sub_details(sub: bs4.element.Tag) -> list:
- for selector in [{'role': 'list'}, {'class': 'bOeY0b'}]:
- details_section = sub.find('div', selector)
+ def _parse_ad_secondary_sub_details(sub: bs4.element.Tag) -> dict | None:
+ for selector in [{"role": "list"}, {"class": "bOeY0b"}]:
+ details_section = sub.find("div", selector)
if details_section:
- urls = webutils.get_link_list(details_section)
+ urls = utils.get_link_list(details_section)
if urls:
- details_list = DetailsList()
- for url in urls:
- details_list.append(DetailsItem(url=url))
- return details_list.to_dicts()
+ return {"type": "links", "items": urls}
return None
-
+
return _parse_ad_secondary(cmpt)
# ------------------------------------------------------------------------------
+
def parse_ad_shopping(cmpt: bs4.element.Tag) -> list:
"""Parse shopping ads from component"""
- subs = webutils.find_all_divs(cmpt, 'div', {'class': 'commercial-unit-desktop-top'})
+ subs = utils.find_all_divs(cmpt, "div", {"class": "commercial-unit-desktop-top"})
parsed_list = []
for sub in subs:
parsed_list.extend(parse_shopping_ads(sub))
return parsed_list
+
# ------------------------------------------------------------------------------
+
def parse_ad_standard(cmpt: bs4.element.Tag) -> list:
"""Parse standard ads from component"""
def _parse_ad_standard_sub(sub: bs4.element.Tag, sub_rank: int = 0) -> dict:
def _parse_ad_standard_text(sub: bs4.element.Tag) -> str:
- text = webutils.get_text_by_selectors(sub, AD_STANDARD_TEXT_SELECTORS)
- label = webutils.get_text(sub, 'span', {'class': 'mXsQRe'})
+ text = utils.get_text_by_selectors(sub, AD_STANDARD_TEXT_SELECTORS)
+ label = utils.get_text(sub, "span", {"class": "mXsQRe"})
return f"{text} {label} " if label else text
-
+
submenu = parse_ad_menu(sub)
- sub_type = 'submenu' if submenu else 'standard'
- parsed = BaseResult(
- type='ad',
- sub_type=sub_type,
- sub_rank=sub_rank,
- title=webutils.get_text(sub, 'div', {'role': 'heading'}),
- url=webutils.get_link(sub, {'class': 'sVXRqc'}),
- cite=webutils.get_text(sub, 'span', {'role': 'text'}),
- text=_parse_ad_standard_text(sub),
- details=submenu,
- error=None
- ).model_dump()
- return parsed
-
- subs = webutils.find_all_divs(cmpt, 'div', {'class': 'uEierd'})
+ sub_type = "submenu" if submenu else "standard"
+ return {
+ "type": "ad",
+ "sub_type": sub_type,
+ "sub_rank": sub_rank,
+ "title": utils.get_text(sub, "div", {"role": "heading"}),
+ "url": utils.get_link(sub, {"class": "sVXRqc"}),
+ "cite": utils.get_text(sub, "span", {"role": "text"}),
+ "text": _parse_ad_standard_text(sub),
+ "details": submenu,
+ }
+
+ subs = utils.find_all_divs(cmpt, "div", {"class": "uEierd"})
return [_parse_ad_standard_sub(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
-def parse_ad_menu(sub: bs4.element.Tag) -> list:
+def parse_ad_menu(sub: bs4.element.Tag) -> dict | None:
"""Parse menu items for a large ad with additional subresults"""
- parsed_items = DetailsList()
+ items = []
# Format 1: MhgNwc items with MUxGbd sub-divs
- menu_items = sub.find_all('div', {'class': 'MhgNwc'})
+ menu_items = sub.find_all("div", {"class": "MhgNwc"})
for item in menu_items:
- parsed_item = DetailsItem()
- item_divs = item.find_all('div', {'class': 'MUxGbd'})
+ parsed_item = {"url": "", "title": "", "text": ""}
+ item_divs = item.find_all("div", {"class": "MUxGbd"})
for div in item_divs:
- if webutils.check_dict_value(div.attrs, 'role', 'listitem'):
- parsed_item.url = webutils.get_link(div) or ''
- parsed_item.title = webutils.get_text(div) or ''
+ if utils.check_dict_value(div.attrs, "role", "listitem"):
+ parsed_item["url"] = utils.get_link(div) or ""
+ parsed_item["title"] = utils.get_text(div) or ""
else:
- parsed_item.text = webutils.get_text(div) or ''
- parsed_items.append(parsed_item)
+ parsed_item["text"] = utils.get_text(div) or ""
+ items.append(parsed_item)
# Format 2: bOeY0b sitelinks section
- if not parsed_items:
- sitelink_div = sub.find('div', {'class': 'bOeY0b'})
+ if not items:
+ sitelink_div = sub.find("div", {"class": "bOeY0b"})
if sitelink_div:
- for link in sitelink_div.find_all('a', href=True):
+ for link in sitelink_div.find_all("a", href=True):
text = link.get_text(strip=True)
- href = link.get('href', '')
+ href = link.get("href", "")
if text and href:
- parsed_items.append(DetailsItem(url=href, title=text))
+ items.append({"url": href, "title": text})
- return parsed_items.to_dicts()
+ return {"type": "menu", "items": items} if items else None
# ------------------------------------------------------------------------------
+
def parse_ad_carousel(
- cmpt: bs4.element.Tag,
- sub_type: str = 'carousel',
- filter_visible: bool = True
- ) -> list:
+ cmpt: bs4.element.Tag, sub_type: str = "carousel", filter_visible: bool = True
+) -> list:
def is_visible_div(sub: bs4.element.Tag) -> bool:
"""Check if carousel div is visible"""
- return not (sub.has_attr('data-has-shown') and sub['data-has-shown'] == 'false')
+ return not (sub.has_attr("data-has-shown") and sub["data-has-shown"] == "false")
def is_visible_card(sub: bs4.element.Tag) -> bool:
"""Check if carousel card is visible"""
- return not (sub.has_attr('data-viewurl') and sub['data-viewurl'])
+ return not (sub.has_attr("data-viewurl") and sub["data-viewurl"])
def parse_ad_carousel_div(sub: bs4.element.Tag, sub_type: str, sub_rank: int) -> dict:
"""Parse ad carousel div, seen 2025-02-06"""
- return BaseResult(
- type='ad',
- sub_type=sub_type,
- sub_rank=sub_rank,
- title=webutils.get_text(sub, 'div', {'class': 'e7SMre'}),
- url=webutils.get_link(sub),
- text=webutils.get_text(sub, 'div', {'class': 'vrAZpb'}),
- cite=webutils.get_text(sub, 'div', {'class': 'zpIwr'}),
- details=None,
- error=None
- ).model_dump()
+ return {
+ "type": "ad",
+ "sub_type": sub_type,
+ "sub_rank": sub_rank,
+ "title": utils.get_text(sub, "div", {"class": "e7SMre"}),
+ "url": utils.get_link(sub),
+ "text": utils.get_text(sub, "div", {"class": "vrAZpb"}),
+ "cite": utils.get_text(sub, "div", {"class": "zpIwr"}),
+ }
def parse_ad_carousel_card(sub: bs4.element.Tag, sub_type: str, sub_rank: int) -> dict:
"""Parse ad carousel card, seen 2024-09-21"""
- return BaseResult(
- type='ad',
- sub_type=sub_type,
- sub_rank=sub_rank,
- title=webutils.get_text(sub, 'div', {'class': 'gCv54b'}),
- url=webutils.get_link(sub, {'class': 'KTsHxd'}),
- text=webutils.get_text(sub, 'div', {'class': 'VHpBje'}),
- cite=webutils.get_text(sub, 'div', {'class': 'j958Pd'}),
- details=None,
- error=None
- ).model_dump()
+ return {
+ "type": "ad",
+ "sub_type": sub_type,
+ "sub_rank": sub_rank,
+ "title": utils.get_text(sub, "div", {"class": "gCv54b"}),
+ "url": utils.get_link(sub, {"class": "KTsHxd"}),
+ "text": utils.get_text(sub, "div", {"class": "VHpBje"}),
+ "cite": utils.get_text(sub, "div", {"class": "j958Pd"}),
+ }
# Possible ad carousel item types
output_list = []
- ad_carousel = cmpt.find('g-scrolling-carousel')
+ ad_carousel = cmpt.find("g-scrolling-carousel")
if ad_carousel:
ad_carousel_types = {
- 'carousel_card': webutils.find_all_divs(ad_carousel, name='g-inner-card'),
- 'carousel_div': webutils.find_all_divs(ad_carousel, name='div', attrs={'class': 'ZPze1e'})
+ "carousel_card": utils.find_all_divs(ad_carousel, name="g-inner-card"),
+ "carousel_div": utils.find_all_divs(ad_carousel, name="div", attrs={"class": "ZPze1e"}),
}
for ad_carousel_type, sub_cmpts in ad_carousel_types.items():
if sub_cmpts:
for sub_rank, sub in enumerate(sub_cmpts):
- if ad_carousel_type == 'carousel_card':
+ if ad_carousel_type == "carousel_card":
if filter_visible and not is_visible_card(sub):
continue
output = parse_ad_carousel_card(sub, sub_type, sub_rank)
- elif ad_carousel_type == 'carousel_div':
+ elif ad_carousel_type == "carousel_div":
if filter_visible and not is_visible_div(sub):
continue
output = parse_ad_carousel_div(sub, sub_type, sub_rank)
output_list.append(output)
-
+
return output_list
diff --git a/WebSearcher/component_parsers/available_on.py b/WebSearcher/component_parsers/available_on.py
index dd843ee..8cac364 100644
--- a/WebSearcher/component_parsers/available_on.py
+++ b/WebSearcher/component_parsers/available_on.py
@@ -1,6 +1,3 @@
-from ..models.data import DetailsItem, DetailsList
-
-
def parse_available_on(cmpt, sub_rank=0) -> list:
"""Parse an available component
@@ -13,28 +10,28 @@ def parse_available_on(cmpt, sub_rank=0) -> list:
Returns:
dict : parsed component
"""
- parsed = {'type': 'available_on', 'sub_rank': sub_rank}
+ parsed = {"type": "available_on", "sub_rank": sub_rank}
- parsed['title'] = cmpt.find('span', {'class': 'GzssTd'}).text
+ parsed["title"] = cmpt.find("span", {"class": "GzssTd"}).text
- details = DetailsList()
- for o in cmpt.find_all('div', {'class': 'kno-fb-ctx'}):
- details.append(parse_available_on_item(o))
- parsed['details'] = details.to_dicts()
+ items = []
+ for o in cmpt.find_all("div", {"class": "kno-fb-ctx"}):
+ items.append(parse_available_on_item(o))
+ parsed["details"] = {"type": "providers", "items": items} if items else None
return [parsed]
-def parse_available_on_item(sub) -> DetailsItem:
+def parse_available_on_item(sub) -> dict:
"""Parse an available on item
Args:
sub (bs4 object): An available on option element
Returns:
- DetailsItem : parsed item with title, url, and cost in misc
+ dict : parsed item with title, url, and cost
"""
- return DetailsItem(
- title=sub.find('div', {'class': 'i3LlFf'}).text,
- url=sub.find('a')['href'],
- misc={'cost': sub.find('div', {'class': 'V8xno'}).text},
- )
+ return {
+ "title": sub.find("div", {"class": "i3LlFf"}).text,
+ "url": sub.find("a")["href"],
+ "cost": sub.find("div", {"class": "V8xno"}).text,
+ }
diff --git a/WebSearcher/component_parsers/banner.py b/WebSearcher/component_parsers/banner.py
index a229441..70f5045 100644
--- a/WebSearcher/component_parsers/banner.py
+++ b/WebSearcher/component_parsers/banner.py
@@ -3,7 +3,7 @@ def parse_banner(cmpt) -> list:
Args:
cmpt (bs4 object): A search suggestion component
-
+
Returns:
list: List of BannerResult objects, with the main component and its subcomponents
"""
@@ -11,27 +11,28 @@ def parse_banner(cmpt) -> list:
# Header subcomponent
banner_result_header = {
- 'type': 'banner',
- 'sub_type': 'header',
- 'sub_rank': 0,
- 'title': _get_result_text(cmpt, '.v3jTId'),
- 'text': _get_result_text(cmpt, '.Cy9gW'),
+ "type": "banner",
+ "sub_type": "header",
+ "sub_rank": 0,
+ "title": _get_result_text(cmpt, ".v3jTId"),
+ "text": _get_result_text(cmpt, ".Cy9gW"),
}
parsed_list.append(banner_result_header)
# Suggestion subcomponents
- for i, suggestion in enumerate(cmpt.select('.TjBpC')):
+ for i, suggestion in enumerate(cmpt.select(".TjBpC")):
banner_result_suggestion = {
- 'type': 'banner',
- 'sub_type': 'suggestion',
- 'sub_rank': i + 1,
- 'title': _get_result_text(suggestion, '.AbPV3'),
- 'url': suggestion.get('href')
+ "type": "banner",
+ "sub_type": "suggestion",
+ "sub_rank": i + 1,
+ "title": _get_result_text(suggestion, ".AbPV3"),
+ "url": suggestion.get("href"),
}
parsed_list.append(banner_result_suggestion)
return parsed_list
+
def _get_result_text(cmpt, selector) -> str:
if cmpt.select_one(selector):
return cmpt.select_one(selector).get_text(strip=True)
diff --git a/WebSearcher/component_parsers/discussions_and_forums.py b/WebSearcher/component_parsers/discussions_and_forums.py
index 8cd06bc..c1b9012 100644
--- a/WebSearcher/component_parsers/discussions_and_forums.py
+++ b/WebSearcher/component_parsers/discussions_and_forums.py
@@ -1,14 +1,15 @@
-from .. import webutils
import bs4
+from .. import utils
+
TITLE_SELECTORS = [
- ('div', {'class': 'zNWc4c'}),
- ('div', {'class': 'qyp6xb'}),
+ ("div", {"class": "zNWc4c"}),
+ ("div", {"class": "qyp6xb"}),
]
CITE_SELECTORS = [
- ('div', {'class': 'LbKnXb'}),
- ('div', {'class': 'VZGVuc'}),
+ ("div", {"class": "LbKnXb"}),
+ ("div", {"class": "VZGVuc"}),
]
SUB_SELECTORS = [
@@ -41,20 +42,19 @@ def parse_item(cmpt: bs4.element.Tag, sub_rank: int = 0) -> dict:
def get_title(sub):
"""Get title from selectors or heading div"""
- title = webutils.get_text_by_selectors(sub, TITLE_SELECTORS)
+ title = utils.get_text_by_selectors(sub, TITLE_SELECTORS)
if not title:
- title = webutils.get_text(sub, 'div', {'role': 'heading'})
+ title = utils.get_text(sub, "div", {"role": "heading"})
return title
def get_cite(sub):
"""Get cite from selectors"""
- return webutils.get_text_by_selectors(sub, CITE_SELECTORS)
+ return utils.get_text_by_selectors(sub, CITE_SELECTORS)
def get_url(sub):
"""Get URL from a subcomponent; try multiple, take first non-null"""
- url_list = [webutils.get_link(sub, {"class": "v4kUNc"}),
- webutils.get_link(sub)]
+ url_list = [utils.get_link(sub, {"class": "v4kUNc"}), utils.get_link(sub)]
url_list = [url for url in url_list if url]
return url_list[0] if url_list else None
diff --git a/WebSearcher/component_parsers/footer.py b/WebSearcher/component_parsers/footer.py
index 2b858a7..db205c3 100644
--- a/WebSearcher/component_parsers/footer.py
+++ b/WebSearcher/component_parsers/footer.py
@@ -1,38 +1,33 @@
-from .. import webutils
-from ..models.data import DetailsItem, DetailsList
+from .. import utils
-class Footer:
+class Footer:
@staticmethod
def parse_image_cards(elem) -> list:
- subs = webutils.find_all_divs(elem, 'div', {'class':'g'})
+ subs = utils.find_all_divs(elem, "div", {"class": "g"})
return [Footer.parse_image_card(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
@staticmethod
def parse_image_card(sub, sub_rank=0) -> dict:
- parsed = {'type':'img_cards', 'sub_rank':sub_rank}
- parsed['title'] = webutils.get_text(sub, "div", {'aria-level':"3", "role":"heading"})
- images = sub.find_all('img')
+ parsed = {"type": "img_cards", "sub_rank": sub_rank}
+ parsed["title"] = utils.get_text(sub, "div", {"aria-level": "3", "role": "heading"})
+ images = sub.find_all("img")
if images:
- details = DetailsList()
- for i in images:
- details.append(DetailsItem(url=i['src'], text=i['alt']))
- parsed['details'] = details.to_dicts()
+ items = [{"url": i["src"], "text": i["alt"]} for i in images]
+ parsed["details"] = {"type": "hyperlinks", "items": items}
return parsed
@staticmethod
def parse_discover_more(elem) -> list:
- carousel = elem.find('g-scrolling-carousel')
- return [{
- 'type':'discover_more',
- 'sub_rank':0,
- 'text': '|'.join(c.text for c in carousel.find_all('g-inner-card'))
- }]
+ carousel = elem.find("g-scrolling-carousel")
+ return [
+ {
+ "type": "discover_more",
+ "sub_rank": 0,
+ "text": "|".join(c.text for c in carousel.find_all("g-inner-card")),
+ }
+ ]
@staticmethod
def parse_omitted_notice(elem) -> list:
- return [{
- 'type':'omitted_notice',
- 'sub_rank':0,
- 'text': webutils.get_text(elem)
- }]
\ No newline at end of file
+ return [{"type": "omitted_notice", "sub_rank": 0, "text": utils.get_text(elem)}]
diff --git a/WebSearcher/component_parsers/general.py b/WebSearcher/component_parsers/general.py
index 8633fee..ded6a5d 100644
--- a/WebSearcher/component_parsers/general.py
+++ b/WebSearcher/component_parsers/general.py
@@ -1,6 +1,7 @@
import re
-from ..models.data import DetailsItem, DetailsList
-from ..webutils import get_text, get_link
+
+from ..utils import get_link, get_text
+
def parse_general_results(cmpt) -> list:
"""Parse a general component
@@ -23,51 +24,51 @@ def find_subcomponents(cmpt) -> list:
"""Find subcomponents within a general component, trying known formats"""
# Standard format
- subs = cmpt.find_all('div', {'class': 'g'})
+ subs = cmpt.find_all("div", {"class": "g"})
if subs:
- parent_g = cmpt.find('div', {'class': 'g'})
- if parent_g and parent_g.find_all('div', {'class': 'g'}):
+ parent_g = cmpt.find("div", {"class": "g"})
+ if parent_g and parent_g.find_all("div", {"class": "g"}):
return [parent_g] # Nested .g dedup
return subs
# Sub-results format (2023+)
- additional = cmpt.find_all('div', {'class': 'd4rhi'})
+ additional = cmpt.find_all("div", {"class": "d4rhi"})
if additional:
- return [cmpt.find('div')] + additional
+ return [cmpt.find("div")] + additional
# Video results
- subs = cmpt.find_all('div', {'class': 'PmEWq'})
+ subs = cmpt.find_all("div", {"class": "PmEWq"})
if subs:
return subs
# Fallback: treat entire component as single result
return [cmpt]
-
+
def parse_general_result(sub, sub_rank=0) -> dict:
"""Parse a general subcomponent
-
+
Args:
sub (bs4 object): A general subcomponent
-
+
Returns:
dict : parsed subresult
"""
-
+
if is_general_video(sub):
return parse_general_video(sub, sub_rank=sub_rank)
# Get title and text body divs
- title_div = sub.find('div', {'class':'rc'}) or sub.find('div', {'class':'yuRUbf'})
- body_div = sub.find('span', {'class':'st'}) or sub.find('div', {'class': 'VwiC3b'})
+ title_div = sub.find("div", {"class": "rc"}) or sub.find("div", {"class": "yuRUbf"})
+ body_div = sub.find("span", {"class": "st"}) or sub.find("div", {"class": "VwiC3b"})
parsed = {
- 'type': 'general',
- 'sub_rank': sub_rank,
- 'title': get_text(title_div, 'h3') if title_div else None,
- 'url': get_link(title_div) if title_div else None,
- 'text': get_text(body_div) if body_div else None,
- 'cite': get_text(sub, 'cite')
+ "type": "general",
+ "sub_rank": sub_rank,
+ "title": get_text(title_div, "h3") if title_div else None,
+ "url": get_link(title_div) if title_div else None,
+ "text": get_text(body_div) if body_div else None,
+ "cite": get_text(sub, "cite"),
}
# Get subtype details
@@ -76,15 +77,15 @@ def parse_general_result(sub, sub_rank=0) -> dict:
def parse_alink(a):
- return DetailsItem(url=a.attrs['href'], text=a.text)
+ return {"url": a.attrs["href"], "text": a.text}
def parse_alink_list(alinks):
- details = DetailsList()
+ items = []
for a in alinks:
- if 'href' in a.attrs:
- details.append(parse_alink(a))
- return details.to_dicts()
+ if "href" in a.attrs:
+ items.append(parse_alink(a))
+ return items
def parse_subtype_details(sub, parsed) -> dict:
@@ -93,65 +94,71 @@ def parse_subtype_details(sub, parsed) -> dict:
details = {}
# If top menu with children, ignore URLs and get correct title URL
- top_menu = sub.find('div', {'class':'yWc32e'})
+ top_menu = sub.find("div", {"class": "yWc32e"})
if top_menu:
has_children = list(top_menu.children)
- if has_children:
+ if has_children:
for child in top_menu.children:
child.decompose()
- if sub.find('h3'):
- parsed['url'] = sub.find('h3').find('a')['href']
+ if sub.find("h3"):
+ parsed["url"] = sub.find("h3").find("a")["href"]
# Subtype specific detail parsing
- if 'class' in sub.attrs:
- if 'd4rhi' in sub.attrs.get('class', []):
- parsed['sub_type'] = 'subresult'
-
+ if "class" in sub.attrs:
+ if "d4rhi" in sub.attrs.get("class", []):
+ parsed["sub_type"] = "subresult"
+
# Submenu - rating
- elif sub.find('g-review-stars'):
- parsed['sub_type'] = 'submenu_rating'
- sibling = sub.find('g-review-stars').next_sibling
+ elif sub.find("g-review-stars"):
+ parsed["sub_type"] = "submenu_rating"
+ sibling = sub.find("g-review-stars").next_sibling
if sibling:
text = str(sibling).strip()
if len(text):
- ratings = parse_ratings(text.split('-'))
+ ratings = parse_ratings(text.split("-"))
details.update(ratings)
-
+ details["type"] = "review"
+
# Submenu - list format
- elif sub.find('div', {'class': ['P1usbc', 'IThcWe']}):
- parsed['sub_type'] = 'submenu'
- submenu_div = sub.find('div', {'class': ['P1usbc', 'IThcWe']})
+ elif sub.find("div", {"class": ["P1usbc", "IThcWe"]}):
+ parsed["sub_type"] = "submenu"
+ submenu_div = sub.find("div", {"class": ["P1usbc", "IThcWe"]})
if submenu_div:
- alinks = submenu_div.find_all('a')
- details['links'] = parse_alink_list(alinks)
+ alinks = submenu_div.find_all("a")
+ details["type"] = "hyperlinks"
+ details["items"] = parse_alink_list(alinks)
# Submenu - table format
- elif sub.find('table'):
- parsed['sub_type'] = 'submenu'
- alinks = sub.find('table').find_all('a')
- details['links'] = parse_alink_list(alinks)
+ elif sub.find("table"):
+ parsed["sub_type"] = "submenu"
+ alinks = sub.find("table").find_all("a")
+ details["type"] = "hyperlinks"
+ details["items"] = parse_alink_list(alinks)
# Mini submenu
- elif sub.find('div', {'class': ['osl', 'jYOxx']}):
- parsed['sub_type'] = 'submenu_mini'
- alinks = sub.find('div', {'class':['osl','jYOxx']}).find_all('a')
- details['links'] = parse_alink_list(alinks)
+ elif sub.find("div", {"class": ["osl", "jYOxx"]}):
+ parsed["sub_type"] = "submenu_mini"
+ alinks = sub.find("div", {"class": ["osl", "jYOxx"]}).find_all("a")
+ details["type"] = "hyperlinks"
+ details["items"] = parse_alink_list(alinks)
- elif sub.find('div', {'class': re.compile('fG8Fp')}):
+ elif sub.find("div", {"class": re.compile("fG8Fp")}):
# Scholar results
- alinks = sub.find('div', {'class': re.compile('fG8Fp')}).find_all('a')
- if len(alinks) and 'Cited by' in alinks[0].text:
- parsed['sub_type'] = 'submenu_scholarly'
- details['links'] = parse_alink_list(alinks)
+ alinks = sub.find("div", {"class": re.compile("fG8Fp")}).find_all("a")
+ if len(alinks) and "Cited by" in alinks[0].text:
+ parsed["sub_type"] = "submenu_scholarly"
+ details["type"] = "hyperlinks"
+ details["items"] = parse_alink_list(alinks)
# Product results
- text = get_text(sub, 'div', {'class': re.compile('fG8Fp')})
- if not alinks and '$' in text:
- parsed['sub_type'] = 'submenu_product'
- product_details = parse_product(text)
+ text = get_text(sub, "div", {"class": re.compile("fG8Fp")})
+ if not alinks and "$" in text:
+ parsed["sub_type"] = "submenu_product"
+ product_details = parse_product(text)
details.update(product_details)
-
- parsed['details'] = details if details else None
+ details["type"] = "product"
+
+ parsed["details"] = details if details else None
return parsed
@@ -159,47 +166,49 @@ def parse_ratings(text) -> dict:
"""Parse ratings that appear below some general components"""
text = [t.strip() for t in text]
- numeric = re.compile(r'^\d*[.]?\d*$')
- rating = re.split('Rating: ', text[0])[-1]
+ numeric = re.compile(r"^\d*[.]?\d*$")
+ rating = re.split("Rating: ", text[0])[-1]
if numeric.match(rating):
- details = {'rating': float(rating)}
+ details = {"rating": float(rating)}
else:
- details = {'rating': rating}
-
+ details = {"rating": rating}
+
if len(text) > 1:
- str_match_0 = re.compile(' vote[s]?| review[s]?')
- str_match_1 = re.compile('Review by')
+ str_match_0 = re.compile(" vote[s]?| review[s]?")
+ str_match_1 = re.compile("Review by")
if str_match_0.search(text[1]):
reviews = re.split(str_match_0, text[1])[0]
- reviews = reviews.replace(',','')[1:] # [1:] drops unicode char
- details['reviews'] = int(reviews)
+ reviews = reviews.replace(",", "")[1:] # [1:] drops unicode char
+ details["reviews"] = int(reviews)
elif str_match_1.search(text[1]):
- details['reviews'] = 1
-
+ details["reviews"] = 1
+
# could parse other fields
# (price, os, category) for products
# (time, cals) for recipes
return details
+
def parse_product(text) -> dict:
"""Parse price and stock that appears below some general components"""
- split_match = re.compile('-|·')
+ split_match = re.compile("-|·")
text = re.split(split_match, text)
if len(text) == 1:
- return {'price': text[0].strip()[1:]}
+ return {"price": text[0].strip()[1:]}
else:
- return {'price': text[0].strip()[1:], 'stock': text[1].strip()[1:]}
+ return {"price": text[0].strip()[1:], "stock": text[1].strip()[1:]}
# ------------------------------------------------------------------------------
# General Video Results
+
def is_general_video(cmpt):
"""Check for a unique class name specific to video results"""
- class_list = cmpt.get('class', [])
- return 'PmEWq' in class_list
+ class_list = cmpt.get("class", [])
+ return "PmEWq" in class_list
def parse_general_video(sub, sub_rank: int = 0) -> dict:
@@ -207,19 +216,19 @@ def parse_general_video(sub, sub_rank: int = 0) -> dict:
Args:
cmpt (bs4 object): A general video component
-
+
Returns:
VideoResult: Parsed information of the video
"""
return {
- 'type': 'general',
- 'sub_type': 'video',
- 'sub_rank': sub_rank,
- 'title': get_result_text(sub, 'h3.LC20lb'),
- 'url': sub.select_one('a[href]').get('href', '') if sub.select_one('a[href]') else None,
- 'text': get_result_text(sub, '.ITZIwc'),
- 'cite': get_result_text(sub, 'cite', strip=False),
- 'details': get_result_details(sub),
+ "type": "general",
+ "sub_type": "video",
+ "sub_rank": sub_rank,
+ "title": get_result_text(sub, "h3.LC20lb"),
+ "url": sub.select_one("a[href]").get("href", "") if sub.select_one("a[href]") else None,
+ "text": get_result_text(sub, ".ITZIwc"),
+ "cite": get_result_text(sub, "cite", strip=False),
+ "details": get_result_details(sub),
}
@@ -229,6 +238,9 @@ def get_result_text(cmpt, selector, strip=True):
def get_result_details(cmpt):
- details = {"source": get_result_text(cmpt, '.gqF9jc', strip=False),
- "duration": get_result_text(cmpt, '.JIv15d')}
- return details
\ No newline at end of file
+ details = {
+ "type": "video",
+ "source": get_result_text(cmpt, ".gqF9jc", strip=False),
+ "duration": get_result_text(cmpt, ".JIv15d"),
+ }
+ return details
diff --git a/WebSearcher/component_parsers/general_questions.py b/WebSearcher/component_parsers/general_questions.py
index 71e1f8b..49d7b8b 100644
--- a/WebSearcher/component_parsers/general_questions.py
+++ b/WebSearcher/component_parsers/general_questions.py
@@ -1,21 +1,22 @@
from .general import parse_general_results
from .people_also_ask import parse_people_also_ask
+
def parse_general_questions(cmpt) -> list:
"""Parse a General + People Also Ask hybrid component
These components consist of a general result followed by a people also
ask component with 3 subresults (questions).
-
+
Args:
cmpt (bs4 object): A latest from component
-
+
Returns:
dict : parsed result
"""
parsed_list_general = parse_general_results(cmpt)
parsed_list_ppa = parse_people_also_ask(cmpt)
- parsed_list_general[0]['details'] = parsed_list_ppa[0].get('details', None)
- parsed_list_general[0]['type'] = 'general_questions'
+ parsed_list_general[0]["details"] = parsed_list_ppa[0].get("details", None)
+ parsed_list_general[0]["type"] = "general_questions"
return parsed_list_general
diff --git a/WebSearcher/component_parsers/images.py b/WebSearcher/component_parsers/images.py
index ac4907c..21ee3dd 100644
--- a/WebSearcher/component_parsers/images.py
+++ b/WebSearcher/component_parsers/images.py
@@ -1,39 +1,47 @@
-""" Parsers for image components
+"""Parsers for image components
Changelog
2025-04-28: added div subcomponent class and sub_type labels
"""
-from ..webutils import get_text, get_link, get_div
+from ..utils import get_div, get_link, get_text
+
def parse_images(cmpt) -> list:
"""Parse an images component"""
parsed_list = []
- if cmpt.find('g-expandable-container'):
+ if cmpt.find("g-expandable-container"):
# Small images: thumbnails with text labels
- subs = cmpt.find_all('a', {'class': 'dgdd6c'})
+ subs = cmpt.find_all("a", {"class": "dgdd6c"})
parsed_subs = [parse_image_small(div, sub_rank) for sub_rank, div in enumerate(subs)]
parsed_list.extend(parsed_subs)
- if cmpt.find('g-scrolling-carousel'):
+ if cmpt.find("g-scrolling-carousel"):
# Medium images or video previews, no text labels
- subs = cmpt.find_all('div', {'class':'eA0Zlc'})
- parsed_subs = [parse_image_multimedia(sub, sub_rank + len(parsed_list)) for sub_rank, sub in enumerate(subs)]
+ subs = cmpt.find_all("div", {"class": "eA0Zlc"})
+ parsed_subs = [
+ parse_image_multimedia(sub, sub_rank + len(parsed_list))
+ for sub_rank, sub in enumerate(subs)
+ ]
parsed_list.extend(parsed_subs)
else:
# Medium images with titles and urls
- subs = cmpt.find_all('div', {'class': ['eA0Zlc', 'vCUuC']})
- parsed_subs = [parse_image_medium(sub, sub_rank + len(parsed_list)) for sub_rank, sub in enumerate(subs)]
+ subs = cmpt.find_all("div", {"class": ["eA0Zlc", "vCUuC"]})
+ parsed_subs = [
+ parse_image_medium(sub, sub_rank + len(parsed_list))
+ for sub_rank, sub in enumerate(subs)
+ ]
parsed_list.extend(parsed_subs)
# Filter empty results
- parsed_list = [p for p in parsed_list if any([p['title'], p['url']])]
-
+ parsed_list = [p for p in parsed_list if any([p["title"], p["url"]])]
+
return parsed_list
+
def parse_image_multimedia(sub, sub_rank=0) -> dict:
"""Parse an images multimedia subcomponent"""
return {
@@ -45,17 +53,18 @@ def parse_image_multimedia(sub, sub_rank=0) -> dict:
"text": None,
}
+
def parse_image_medium(sub, sub_rank=0) -> dict:
"""Parse an images medium subcomponent"""
-
- title_div = get_div(sub, 'a', {'class':'EZAeBe'})
- title = get_text(title_div) if title_div else get_text(sub, 'span', {'class':'Yt787'})
+
+ title_div = get_div(sub, "a", {"class": "EZAeBe"})
+ title = get_text(title_div) if title_div else get_text(sub, "span", {"class": "Yt787"})
url = get_link(sub) if title_div else get_img_url(sub)
if not title:
title = get_img_alt(sub)
if not url:
- url = get_link(sub, attrs={'class':['EZAeBe', 'ddkIM']})
+ url = get_link(sub, attrs={"class": ["EZAeBe", "ddkIM"]})
return {
"type": "images",
@@ -64,36 +73,38 @@ def parse_image_medium(sub, sub_rank=0) -> dict:
"title": title,
"url": url,
"text": None,
- "cite": get_text(sub, 'div', {'class':'ptes9b'})
+ "cite": get_text(sub, "div", {"class": "ptes9b"}),
}
+
def parse_image_small(sub, sub_rank=0) -> dict:
"""Parse an images small subcomponent"""
return {
- "type": "images",
+ "type": "images",
"sub_type": "small",
"sub_rank": sub_rank,
- "title": get_text(sub, 'div', {'class':'xlY4q'}),
+ "title": get_text(sub, "div", {"class": "xlY4q"}),
"url": None,
"text": None,
}
+
def get_img_url(sub):
"""Get image source"""
def get_image_url_from_img_src(sub):
- img_src = sub.find('img').attrs['src']
- if img_src.startswith('data:image'):
+ img_src = sub.find("img").attrs["src"]
+ if img_src.startswith("data:image"):
raise ValueError(f"Data URL: {img_src}")
else:
return img_src
-
+
def get_image_url_from_img_title(sub):
- return sub.find('img').attrs['title']
-
+ return sub.find("img").attrs["title"]
+
def get_image_url_from_attrs(sub):
- return sub.attrs['data-lpage']
+ return sub.attrs["data-lpage"]
func_list = [
get_image_url_from_img_src,
@@ -105,11 +116,11 @@ def get_image_url_from_attrs(sub):
for func in func_list:
try:
url = func(sub)
- if url.startswith('data:image'):
+ if url.startswith("data:image"):
raise ValueError(f"Data URL: {url}")
else:
return url
- except Exception as e:
+ except Exception:
pass
return None
@@ -119,4 +130,4 @@ def get_img_alt(sub):
try:
return f"alt-text: {sub.find('img').attrs['alt']}"
except Exception:
- return None
\ No newline at end of file
+ return None
diff --git a/WebSearcher/component_parsers/knowledge.py b/WebSearcher/component_parsers/knowledge.py
index 93110a2..41f6d9d 100644
--- a/WebSearcher/component_parsers/knowledge.py
+++ b/WebSearcher/component_parsers/knowledge.py
@@ -1,140 +1,149 @@
-from .. import webutils
-from ..models.data import DetailsItem, DetailsList
+from .. import utils
from .general import parse_general_result
def parse_knowledge_panel(cmpt, sub_rank=0) -> list:
"""Parse the Knowledge Box
-
+
Args:
cmpt (bs4 object): a knowledge component
-
+
Returns:
list: Return parsed dictionary in a list
"""
- parsed = {'type':'knowledge', 'sub_rank':sub_rank}
+ parsed = {"type": "knowledge", "sub_rank": sub_rank}
# Get embedded result if it exists
- result = cmpt.find('div', {'class':'rc'})
+ result = cmpt.find("div", {"class": "rc"})
if result:
- parsed['title'] = webutils.get_text(result, 'h3')
- parsed['url'] = webutils.get_link(result)
- parsed['cite'] = webutils.get_text(result, 'cite')
+ parsed["title"] = utils.get_text(result, "h3")
+ parsed["url"] = utils.get_link(result)
+ parsed["cite"] = utils.get_text(result, "cite")
- parsed['text'] = webutils.get_text(cmpt, "div", {"role":"heading", "aria-level":"3"})
+ parsed["text"] = utils.get_text(cmpt, "div", {"role": "heading", "aria-level": "3"})
# Get details
details = {}
- heading = cmpt.find('div', {'role':'heading'})
- details['heading'] = heading.text if heading else None
+ heading = cmpt.find("div", {"role": "heading"})
+ details["heading"] = heading.text if heading else None
- alinks = cmpt.find_all('a')
+ alinks = cmpt.find_all("a")
if alinks:
- urls = DetailsList()
+ urls = []
seen_urls = set()
for a in alinks:
- if 'href' in a.attrs and a['href'] != '#':
- if a['href'] not in seen_urls:
- seen_urls.add(a['href'])
+ if "href" in a.attrs and a["href"] != "#":
+ if a["href"] not in seen_urls:
+ seen_urls.add(a["href"])
urls.append(parse_alink(a))
- details['urls'] = urls.to_dicts()
+ details["urls"] = urls
# Get all text
if cmpt.find("div", {"class": "Fzsovc"}):
- parsed['sub_type'] = 'ai_overview'
- elif cmpt.find("div", {"class":"pxiwBd"}):
- parsed['sub_type'] = 'featured_results'
+ parsed["sub_type"] = "ai_overview"
+ elif cmpt.find("div", {"class": "pxiwBd"}):
+ parsed["sub_type"] = "featured_results"
elif (
- cmpt.find('h2') and cmpt.find('h2').text == 'Featured snippet from the web' or
- cmpt.find('div', {'class':'answered-question'})
+ cmpt.find("h2")
+ and cmpt.find("h2").text == "Featured snippet from the web"
+ or cmpt.find("div", {"class": "answered-question"})
):
- parsed['sub_type'] = 'featured_snippet'
- span = cmpt.find_all(['span'])
- details['text'] = get_text(span) if span else None
+ parsed["sub_type"] = "featured_snippet"
+ span = cmpt.find_all(["span"])
+ details["text"] = get_text(span) if span else None
# General component with no abstract
- if cmpt.find('div', {'class':'g'}):
- parsed_general = parse_general_result(cmpt.find('div', {'class':'g'}))
- parsed_general = {k:v for k,v in parsed_general.items() if k in {'title', 'url', 'cite'}}
+ if cmpt.find("div", {"class": "g"}):
+ parsed_general = parse_general_result(cmpt.find("div", {"class": "g"}))
+ parsed_general = {
+ k: v for k, v in parsed_general.items() if k in {"title", "url", "cite"}
+ }
parsed.update(parsed_general)
+ elif cmpt.find("h2") and cmpt.find("h2").text == "Unit Converter":
+ parsed["sub_type"] = "unit_converter"
+ span = cmpt.find_all(["span"])
+ details["text"] = get_text(span) if span else None
- elif cmpt.find('h2') and cmpt.find('h2').text == 'Unit Converter':
- parsed['sub_type'] = 'unit_converter'
- span = cmpt.find_all(['span'])
- details['text'] = get_text(span) if span else None
-
- elif cmpt.find('h2') and cmpt.find('h2').text == 'Sports Results':
- parsed['sub_type'] = 'sports'
- div = cmpt.find('div', {'class':'SwsxUd'})
- details['text'] = div.text if div else None
+ elif cmpt.find("h2") and cmpt.find("h2").text == "Sports Results":
+ parsed["sub_type"] = "sports"
+ div = cmpt.find("div", {"class": "SwsxUd"})
+ details["text"] = div.text if div else None
- elif cmpt.find('h2') and cmpt.find('h2').text == 'Weather Result':
- parsed['sub_type'] = 'weather'
+ elif cmpt.find("h2") and cmpt.find("h2").text == "Weather Result":
+ parsed["sub_type"] = "weather"
elif (
- cmpt.find('h2') and cmpt.find('h2').text == 'Finance Results' or
- cmpt.find('div', {'id':'knowledge-finance-wholepage__entity-summary'})
+ cmpt.find("h2")
+ and cmpt.find("h2").text == "Finance Results"
+ or cmpt.find("div", {"id": "knowledge-finance-wholepage__entity-summary"})
):
- parsed['sub_type'] = 'finance'
+ parsed["sub_type"] = "finance"
- elif (
- cmpt.find('div', {'data-attrid': 'DictionaryHeader'}) or
- (cmpt.find('div', {'role':'button'}) and cmpt.find('div', {'role':'button'}).text == 'Dictionary')
+ elif cmpt.find("div", {"data-attrid": "DictionaryHeader"}) or (
+ cmpt.find("div", {"role": "button"})
+ and cmpt.find("div", {"role": "button"}).text == "Dictionary"
):
- parsed['sub_type'] = 'dictionary'
- vmod = cmpt.find('div', {'class': 'vmod'})
+ parsed["sub_type"] = "dictionary"
+ vmod = cmpt.find("div", {"class": "vmod"})
if vmod:
- details['text'] = vmod.get_text(' ', strip=True).split('Translate')[0]
+ details["text"] = vmod.get_text(" ", strip=True).split("Translate")[0]
else:
- span_first = cmpt.find('span', {'jsslot':''})
+ span_first = cmpt.find("span", {"jsslot": ""})
if span_first:
- span = span_first.find_all('span')
- details['text'] = get_text(span).split('Translate')[0] if span else None
+ span = span_first.find_all("span")
+ details["text"] = get_text(span).split("Translate")[0] if span else None
elif (
- cmpt.find('h2') and cmpt.find('h2').text == 'Translation Result' or
- cmpt.find('h2') and cmpt.find('h2').text == 'Resultado de traducción'
+ cmpt.find("h2")
+ and cmpt.find("h2").text == "Translation Result"
+ or cmpt.find("h2")
+ and cmpt.find("h2").text == "Resultado de traducción"
):
- parsed['sub_type'] = 'translate'
- span = cmpt.find_all('span')
- details['text'] = get_text(span).split('Community Verified')[0] if span else None
-
- elif cmpt.find('h2') and cmpt.find('h2').text == 'Calculator Result':
- parsed['sub_type'] = 'calculator'
-
- elif details['heading'] == '2020 US election results':
- parsed['sub_type'] = 'election'
- span = cmpt.find_all(['span'])
- details['text'] = get_text(span) if span else None
-
- elif cmpt.find('span', {'role': 'heading', 'class': 'IFnjPb'}):
- heading_span = cmpt.find('span', {'role': 'heading', 'class': 'IFnjPb'})
- if heading_span and heading_span.text.strip() in ('Things to know', 'Cosas que debes saber'):
- parsed['sub_type'] = 'things_to_know'
- details['heading'] = heading_span.text.strip()
+ parsed["sub_type"] = "translate"
+ span = cmpt.find_all("span")
+ details["text"] = get_text(span).split("Community Verified")[0] if span else None
+
+ elif cmpt.find("h2") and cmpt.find("h2").text == "Calculator Result":
+ parsed["sub_type"] = "calculator"
+
+ elif details["heading"] == "2020 US election results":
+ parsed["sub_type"] = "election"
+ span = cmpt.find_all(["span"])
+ details["text"] = get_text(span) if span else None
+
+ elif cmpt.find("span", {"role": "heading", "class": "IFnjPb"}):
+ heading_span = cmpt.find("span", {"role": "heading", "class": "IFnjPb"})
+ if heading_span and heading_span.text.strip() in (
+ "Things to know",
+ "Cosas que debes saber",
+ ):
+ parsed["sub_type"] = "things_to_know"
+ details["heading"] = heading_span.text.strip()
else:
- parsed['sub_type'] = 'panel'
- div = cmpt.find_all(['span','div','a'], string=True)
- details['text'] = get_text(div) if div else None
+ parsed["sub_type"] = "panel"
+ div = cmpt.find_all(["span", "div", "a"], string=True)
+ details["text"] = get_text(div) if div else None
- text_divs = cmpt.find_all("div", {"class":"sinMW"})
- text_list = [webutils.get_text(div) for div in text_divs]
+ text_divs = cmpt.find_all("div", {"class": "sinMW"})
+ text_list = [utils.get_text(div) for div in text_divs]
parsed["text"] = "<|>".join(text_list) if text_list else None
- parsed["title"] = webutils.get_text(cmpt, "div", {"class": ["ZbhV9d", "HdbW6"]})
- # parsed["title"] = webutils.get_text(cmpt, "div", {"class":"HdbW6"}) if not parsed["title"] else parsed["title"]
+ parsed["title"] = utils.get_text(cmpt, "div", {"class": ["ZbhV9d", "HdbW6"]})
# Get image
- img_div = cmpt.find('div', {'class':'img-brk'})
- details['img_url'] = img_div.find('a')['href'] if img_div else None
- parsed['details'] = details
+ img_div = cmpt.find("div", {"class": "img-brk"})
+ details["img_url"] = img_div.find("a")["href"] if img_div else None
+ details["type"] = "panel"
+ parsed["details"] = details
return [parsed]
+
def get_text(div):
- return '|'.join([d.get_text(separator=' ') for d in div if d.text])
+ return "|".join([d.get_text(separator=" ") for d in div if d.text])
+
def parse_alink(a):
- return DetailsItem(url=a['href'], text=a.get_text('|'))
+ return {"url": a["href"], "text": a.get_text("|")}
diff --git a/WebSearcher/component_parsers/knowledge_rhs.py b/WebSearcher/component_parsers/knowledge_rhs.py
index d5d5173..3c7a311 100644
--- a/WebSearcher/component_parsers/knowledge_rhs.py
+++ b/WebSearcher/component_parsers/knowledge_rhs.py
@@ -1,6 +1,3 @@
-from ..models.data import DetailsItem, DetailsList
-
-
def parse_knowledge_rhs(cmpt, sub_rank=0) -> list:
"""Parse the Right-Hand-Side Knowledge Panel
@@ -11,12 +8,10 @@ def parse_knowledge_rhs(cmpt, sub_rank=0) -> list:
list: Return parsed dictionary in a list
"""
parsed_list = parse_knowledge_rhs_main(cmpt)
- description = cmpt.find('h2', {'class': 'Uo8X3b'})
+ description = cmpt.find("h2", {"class": "Uo8X3b"})
if description and description.parent:
subs = [s for s in description.parent.next_siblings]
- parsed_subs = [
- parse_knowledge_rhs_sub(sub, sub_rank) for sub_rank, sub in enumerate(subs)
- ]
+ parsed_subs = [parse_knowledge_rhs_sub(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
parsed_list.extend(parsed_subs)
return parsed_list
@@ -25,64 +20,59 @@ def parse_knowledge_rhs_main(cmpt, sub_rank=0) -> list:
"""Parse the Right-Hand-Side Knowledge Panel main component"""
parsed = {
- 'type': 'knowledge',
- 'sub_type': 'panel_rhs',
- 'sub_rank': sub_rank,
- 'title': '',
- 'text': '',
- 'url': '',
- 'details': {},
- 'rhs_column': True
+ "type": "knowledge",
+ "sub_type": "panel_rhs",
+ "sub_rank": sub_rank,
+ "title": "",
+ "text": "",
+ "url": "",
+ "details": {},
+ "rhs_column": True,
}
# images
- if cmpt.find('h3') and cmpt.find('h3').text == 'Images':
- sibling = cmpt.find('h3').next_sibling
+ if cmpt.find("h3") and cmpt.find("h3").text == "Images":
+ sibling = cmpt.find("h3").next_sibling
if sibling:
- imgs = sibling.find_all('a')
- parsed['details']['img_urls'] = [
- img['href'] for img in imgs if 'href' in img.attrs
- ]
+ imgs = sibling.find_all("a")
+ parsed["details"]["img_urls"] = [img["href"] for img in imgs if "href" in img.attrs]
# title, subtitle
- if cmpt.find('h2', {'data-attrid': 'title'}):
- parsed['title'] = cmpt.find('h2', {'data-attrid': 'title'}).text
- if cmpt.find('div', {'data-attrid': 'subtitle'}):
- parsed['details']['subtitle'] = cmpt.find(
- 'div', {'data-attrid': 'subtitle'}
- ).text
+ if cmpt.find("h2", {"data-attrid": "title"}):
+ parsed["title"] = cmpt.find("h2", {"data-attrid": "title"}).text
+ if cmpt.find("div", {"data-attrid": "subtitle"}):
+ parsed["details"]["subtitle"] = cmpt.find("div", {"data-attrid": "subtitle"}).text
# description
- description = cmpt.find('h2', {'class': 'Uo8X3b'})
+ description = cmpt.find("h2", {"class": "Uo8X3b"})
if description and description.parent:
- if description.parent.find('span'):
- parsed['text'] = description.parent.find('span').text
- if (
- description.parent.find('a')
- and 'href' in description.parent.find('a').attrs
- ):
- parsed['url'] = description.parent.find('a')['href']
-
- description = cmpt.find('div', {'class': 'kno-rdesc'})
+ if description.parent.find("span"):
+ parsed["text"] = description.parent.find("span").text
+ if description.parent.find("a") and "href" in description.parent.find("a").attrs:
+ parsed["url"] = description.parent.find("a")["href"]
+
+ description = cmpt.find("div", {"class": "kno-rdesc"})
if description:
- parsed['text'] = description.find('span').text
- if description.find('a') and 'href' in description.find('a').attrs:
- parsed['url'] = description.find('a')['href']
+ parsed["text"] = description.find("span").text
+ if description.find("a") and "href" in description.find("a").attrs:
+ parsed["url"] = description.find("a")["href"]
# submenu
if description and description.parent:
- alinks = description.parent.find_all('a')
+ alinks = description.parent.find_all("a")
if description.parent.previous_sibling:
- alinks += description.parent.previous_sibling.find_all('a')
+ alinks += description.parent.previous_sibling.find_all("a")
if len(alinks) > 1: # 1st match has main description
- urls = DetailsList()
+ urls = []
for a in alinks[1:]:
- if 'href' in a.attrs:
+ if "href" in a.attrs:
urls.append(parse_alink(a))
- parsed['details']['urls'] = urls.to_dicts()
+ parsed["details"]["urls"] = urls
- if not len(parsed['details']):
- parsed['details'] = None
+ if parsed["details"]:
+ parsed["details"]["type"] = "panel"
+ else:
+ parsed["details"] = None
return [parsed]
@@ -91,28 +81,28 @@ def parse_knowledge_rhs_sub(sub, sub_rank=0) -> dict:
"""Parse a Right-Hand-Side Knowledge Panel subcomponent"""
parsed = {
- 'type': 'knowledge',
- 'sub_type': 'panel_rhs',
- 'sub_rank': sub_rank + 1,
- 'title': '',
- 'details': None,
- 'rhs_column': True
+ "type": "knowledge",
+ "sub_type": "panel_rhs",
+ "sub_rank": sub_rank + 1,
+ "title": "",
+ "details": None,
+ "rhs_column": True,
}
- heading = sub.find('div', {'role': 'heading'})
+ heading = sub.find("div", {"role": "heading"})
if heading:
- parsed['title'] = heading.get_text(' ')
+ parsed["title"] = heading.get_text(" ")
- alinks = sub.find_all('a')
+ alinks = sub.find_all("a")
if alinks:
- details = DetailsList()
+ items = []
for a in alinks:
- if 'href' in a.attrs:
- details.append(parse_alink(a))
- parsed['details'] = details.to_dicts()
+ if "href" in a.attrs:
+ items.append(parse_alink(a))
+ parsed["details"] = {"type": "hyperlinks", "items": items} if items else None
return parsed
def parse_alink(a):
- return DetailsItem(url=a['href'], text=a.text)
+ return {"url": a["href"], "text": a.text}
diff --git a/WebSearcher/component_parsers/latest_from.py b/WebSearcher/component_parsers/latest_from.py
index cdd1676..8f6363c 100644
--- a/WebSearcher/component_parsers/latest_from.py
+++ b/WebSearcher/component_parsers/latest_from.py
@@ -1,14 +1,15 @@
from .top_stories import parse_top_stories
+
def parse_latest_from(cmpt):
"""Parse a "Latest news" component
These components are the same as Top Stories, but have a different heading.
-
+
Args:
cmpt (bs4 object): A latest from component
-
+
Returns:
dict : parsed result
"""
- return parse_top_stories(cmpt, ctype='latest_from')
+ return parse_top_stories(cmpt, ctype="latest_from")
diff --git a/WebSearcher/component_parsers/local_news.py b/WebSearcher/component_parsers/local_news.py
index 57d03e6..b5dcffc 100644
--- a/WebSearcher/component_parsers/local_news.py
+++ b/WebSearcher/component_parsers/local_news.py
@@ -1,14 +1,15 @@
from .top_stories import parse_top_stories
+
def parse_local_news(cmpt):
"""Parse a "Perspectives & opinions" component
These components are the same as Top Stories, but have a different heading.
-
+
Args:
cmpt (bs4 object): A latest from component
-
+
Returns:
dict : parsed result
"""
- return parse_top_stories(cmpt, ctype='local_news')
+ return parse_top_stories(cmpt, ctype="local_news")
diff --git a/WebSearcher/component_parsers/local_results.py b/WebSearcher/component_parsers/local_results.py
index 372f7f7..bbec1ad 100644
--- a/WebSearcher/component_parsers/local_results.py
+++ b/WebSearcher/component_parsers/local_results.py
@@ -1,5 +1,4 @@
from .. import utils
-from .. import webutils
HEADER_SELECTORS = [
("h2", {"role": "heading"}),
@@ -10,91 +9,95 @@
def parse_local_results(cmpt) -> list:
"""Parse a "Local Results" component
- These components contain an embedded map followed by vertically
- stacked subcomponents for locations. These locations are typically
+ These components contain an embedded map followed by vertically
+ stacked subcomponents for locations. These locations are typically
businesses relevant to the query.
-
+
Args:
cmpt (bs4 object): A local results component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
- subs = cmpt.find_all('div', {'class': 'VkpGBb'})
+ subs = cmpt.find_all("div", {"class": "VkpGBb"})
parsed_list = [parse_local_result(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
if parsed_list:
-
# Set first non-empty header as sub_type (e.g. "Places" -> places)
- header = webutils.get_text_by_selectors(cmpt, HEADER_SELECTORS)
+ header = utils.get_text_by_selectors(cmpt, HEADER_SELECTORS)
if header:
- sub_type = header.lower().replace(" ", "_")
+ header_lower = header.lower()
+ sub_type = (
+ "results_for"
+ if header_lower.startswith("results for")
+ else header_lower.replace(" ", "_")
+ )
for parsed in parsed_list:
- parsed.update({'sub_type':sub_type})
+ parsed.update({"sub_type": sub_type})
return parsed_list
else:
parsed = {
- 'type':'local_results',
- 'sub_rank':0,
- 'text':webutils.get_text(cmpt, 'div', {'class': 'n6tePd'}) # No results message
+ "type": "local_results",
+ "sub_rank": 0,
+ "text": utils.get_text(cmpt, "div", {"class": "n6tePd"}), # No results message
}
return [parsed]
+
def parse_local_result(sub, sub_rank=0) -> dict:
"""Parse a "Local Results" subcomponent
-
+
Args:
sub (bs4 object): A local results subcomponent
-
+
Returns:
dict : parsed subresult
"""
- parsed = {'type':'local_results',
- 'sub_rank':sub_rank}
- parsed['title'] = webutils.get_text(sub, 'div', {'class':'dbg0pd'})
+ parsed = {"type": "local_results", "sub_rank": sub_rank}
+ parsed["title"] = utils.get_text(sub, "div", {"class": "dbg0pd"})
# Extract URL
- links = [a.attrs['href'] for a in sub.find_all('a') if 'href' in a.attrs]
- links_text = [a.text.lower() for a in sub.find_all('a') if 'href' in a.attrs]
+ links = [a.attrs["href"] for a in sub.find_all("a") if "href" in a.attrs]
+ links_text = [a.text.lower() for a in sub.find_all("a") if "href" in a.attrs]
links_dict = dict(zip(links_text, links))
- parsed['url'] = links_dict.get('website', None)
+ parsed["url"] = links_dict.get("website", None)
# Extract text and label
- text = webutils.get_text(sub, 'div', {'class':'rllt__details'}, separator='<|>')
- label = webutils.get_text(sub, "span", {"class":"X0w5lc"})
- parsed['text'] = f"{text} {label} " if label else text
- parsed['details'] = parse_local_details(sub)
+ text = utils.get_text(sub, "div", {"class": "rllt__details"}, separator="<|>")
+ label = utils.get_text(sub, "span", {"class": "X0w5lc"})
+ parsed["text"] = f"{text} {label} " if label else text
+ parsed["details"] = parse_local_details(sub)
return parsed
def parse_local_details(sub) -> dict:
-
- local_details = {}
+
+ local_details = {"type": "ratings"}
# Extract summary details
- detail_div = sub.find('span', {'class':'rllt__details'})
- detail_divs = detail_div.find_all('div') if detail_div else None
+ detail_div = sub.find("span", {"class": "rllt__details"})
+ detail_divs = detail_div.find_all("div") if detail_div else None
# Extract rating and location type
if detail_divs:
rating_div = detail_divs[0]
- rating = rating_div.find('span', {'class':'BTtC6e'})
- if rating:
- local_details['rating'] = float(rating.text)
- n_reviews = utils.get_between_parentheses(rating_div.text).replace(',','')
- local_details['n_reviews'] = int(n_reviews)
- local_details['loc_label'] = rating_div.text.split('·')[-1].strip()
+ rating = rating_div.find("span", {"class": "BTtC6e"})
+ if rating:
+ local_details["rating"] = float(rating.text)
+ n_reviews = utils.get_between_parentheses(rating_div.text).replace(",", "")
+ local_details["n_reviews"] = int(n_reviews)
+ local_details["loc_label"] = rating_div.text.split("·")[-1].strip()
# Extract contact details
if len(detail_divs) > 1:
contact_div = detail_divs[1]
- local_details['contact'] = contact_div.text
+ local_details["contact"] = contact_div.text
# Extract various links
- links = [a.attrs['href'] for a in sub.find_all('a') if 'href' in a.attrs]
- links_text = [a.text.lower() for a in sub.find_all('a') if 'href' in a.attrs]
+ links = [a.attrs["href"] for a in sub.find_all("a") if "href" in a.attrs]
+ links_text = [a.text.lower() for a in sub.find_all("a") if "href" in a.attrs]
links_dict = dict(zip(links_text, links))
local_details.update(links_dict)
- return local_details
\ No newline at end of file
+ return local_details
diff --git a/WebSearcher/component_parsers/locations.py b/WebSearcher/component_parsers/locations.py
index 1016ba7..6891d3e 100644
--- a/WebSearcher/component_parsers/locations.py
+++ b/WebSearcher/component_parsers/locations.py
@@ -1,68 +1,73 @@
"""Parser for locations components (hotels, etc.)"""
import bs4
-from .. import webutils
def parse_locations(cmpt: bs4.element.Tag) -> list:
"""Parse a locations component (e.g. hotel listings)"""
sub_type = classify_locations_sub_type(cmpt)
- if sub_type == 'hotels':
+ if sub_type == "hotels":
return parse_hotels(cmpt)
- return [{'type': 'locations', 'sub_rank': 0, 'error': f'unknown sub_type: {sub_type}'}]
+ return [{"type": "locations", "sub_rank": 0, "error": f"unknown sub_type: {sub_type}"}]
def classify_locations_sub_type(cmpt: bs4.element.Tag) -> str:
"""Classify the sub-type of a locations component"""
- heading = cmpt.find(attrs={'role': 'heading'})
+ heading = cmpt.find(attrs={"role": "heading"})
if heading:
text = heading.get_text(strip=True)
- if 'Hotels' in text or 'Hotel' in text:
- return 'hotels'
+ if "Hotels" in text or "Hotel" in text:
+ return "hotels"
# Check for /travel/ links as fallback
- if cmpt.find('a', href=lambda h: h and '/travel/' in h):
- return 'hotels'
- return 'unknown'
+ if cmpt.find("a", href=lambda h: h and "/travel/" in h):
+ return "hotels"
+ return "unknown"
def parse_hotels(cmpt: bs4.element.Tag) -> list:
"""Parse hotel items from a locations component"""
items = []
- for a in cmpt.find_all('a', href=True):
- href = a.get('href', '')
- if '/travel/' not in href:
+ for a in cmpt.find_all("a", href=True):
+ href = a.get("href", "")
+ if "/travel/" not in href:
continue
- name_div = a.find('div', {'class': 'sxdlOc'}) or a.find('div', {'class': 'BTPx6e'})
+ name_div = a.find("div", {"class": "sxdlOc"}) or a.find("div", {"class": "BTPx6e"})
if not name_div:
continue
items.append(_parse_hotel_item(a, len(items)))
if not items:
- return [{'type': 'locations', 'sub_type': 'hotels', 'sub_rank': 0,
- 'error': 'no hotel items found'}]
+ return [
+ {
+ "type": "locations",
+ "sub_type": "hotels",
+ "sub_rank": 0,
+ "error": "no hotel items found",
+ }
+ ]
return items
def _parse_hotel_item(a: bs4.element.Tag, sub_rank: int) -> dict:
"""Parse a single hotel item from an anchor tag"""
- name_div = a.find('div', {'class': 'sxdlOc'}) or a.find('div', {'class': 'BTPx6e'})
- price_span = a.find('span', {'class': 'sRlU8b'})
- rating_span = a.find('span', {'class': 'yi40Hd'})
- reviews_span = a.find('span', {'class': 'RDApEe'})
- stars_span = a.find('span', {'class': 'NAkmnc'})
- desc_div = a.find('div', {'class': 'S7Ajc'})
+ name_div = a.find("div", {"class": "sxdlOc"}) or a.find("div", {"class": "BTPx6e"})
+ price_span = a.find("span", {"class": "sRlU8b"})
+ rating_span = a.find("span", {"class": "yi40Hd"})
+ reviews_span = a.find("span", {"class": "RDApEe"})
+ stars_span = a.find("span", {"class": "NAkmnc"})
+ desc_div = a.find("div", {"class": "S7Ajc"})
return {
- 'type': 'locations',
- 'sub_type': 'hotels',
- 'sub_rank': sub_rank,
- 'title': name_div.get_text(strip=True) if name_div else None,
- 'url': a.get('href'),
- 'text': desc_div.get_text(strip=True) if desc_div else None,
- 'cite': None,
- 'details': _parse_hotel_details(price_span, rating_span, reviews_span, stars_span),
+ "type": "locations",
+ "sub_type": "hotels",
+ "sub_rank": sub_rank,
+ "title": name_div.get_text(strip=True) if name_div else None,
+ "url": a.get("href"),
+ "text": desc_div.get_text(strip=True) if desc_div else None,
+ "cite": None,
+ "details": _parse_hotel_details(price_span, rating_span, reviews_span, stars_span),
}
@@ -70,11 +75,11 @@ def _parse_hotel_details(price_span, rating_span, reviews_span, stars_span) -> d
"""Extract hotel metadata"""
details = {}
if price_span:
- details['price'] = price_span.get_text(strip=True)
+ details["price"] = price_span.get_text(strip=True)
if rating_span:
- details['rating'] = rating_span.get_text(strip=True)
+ details["rating"] = rating_span.get_text(strip=True)
if reviews_span:
- details['reviews'] = reviews_span.get_text(strip=True)
+ details["reviews"] = reviews_span.get_text(strip=True)
if stars_span:
- details['stars'] = stars_span.get_text(strip=True)
+ details["stars"] = stars_span.get_text(strip=True)
return details if details else None
diff --git a/WebSearcher/component_parsers/map_results.py b/WebSearcher/component_parsers/map_results.py
index 678e431..736d8a5 100644
--- a/WebSearcher/component_parsers/map_results.py
+++ b/WebSearcher/component_parsers/map_results.py
@@ -1,24 +1,26 @@
-from .. import webutils
+from .. import utils
TITLE_SELECTORS = [
- ('div', {'class': 'aiAXrc'}),
+ ("div", {"class": "aiAXrc"}),
]
def parse_map_results(cmpt, sub_rank=0) -> list:
"""Parse a "Map Results" component
- These components contain an embedded map that is not followed by
+ These components contain an embedded map that is not followed by
map results.
-
+
Args:
cmpt (bs4 object): A map results component
-
+
Returns:
dict : parsed result
"""
- return [{
- 'type': 'map_results',
- 'sub_rank': sub_rank,
- 'title': webutils.get_text_by_selectors(cmpt, TITLE_SELECTORS)
- }]
+ return [
+ {
+ "type": "map_results",
+ "sub_rank": sub_rank,
+ "title": utils.get_text_by_selectors(cmpt, TITLE_SELECTORS),
+ }
+ ]
diff --git a/WebSearcher/component_parsers/news_quotes.py b/WebSearcher/component_parsers/news_quotes.py
index 6a3b225..c85274c 100644
--- a/WebSearcher/component_parsers/news_quotes.py
+++ b/WebSearcher/component_parsers/news_quotes.py
@@ -1,53 +1,53 @@
def parse_news_quotes(cmpt) -> list:
"""Parse a "Quotes in the News" component
-
+
Args:
cmpt (bs4 object): a news quotes component
-
+
Returns:
list: list of parsed subcomponent dictionaries
"""
- subs = cmpt.find_all('g-inner-card')
+ subs = cmpt.find_all("g-inner-card")
return [parse_news_quote(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
+
def parse_news_quote(sub, sub_rank=0) -> dict:
"""Parse a "Quotes in the News" subcomponent
-
+
Args:
sub (bs4 object): a news quotes subcomponent
-
+
Returns:
dict : parsed subresult
"""
- parsed = {'type':'news_quotes', 'sub_rank':sub_rank}
+ parsed = {"type": "news_quotes", "sub_rank": sub_rank}
children = list(sub.children)
- if len(children) == 1: # Unfold nested div
+ if len(children) == 1: # Unfold nested div
children = list(children[0].children)
if len(children) == 2:
quote, result = children
- else: # Remove dummy div in middle
+ else: # Remove dummy div in middle
quote, _, result = children
# legacy parsing
- if (len(list(result.children)) == 2):
+ if len(list(result.children)) == 2:
title, meta = result.children
cite, timestamp = meta.children
- parsed['title'] = title.text
- parsed['url'] = title['href']
- parsed['cite'] = cite.text
- parsed['timestamp'] = timestamp.text
+ parsed["title"] = title.text
+ parsed["url"] = title["href"]
+ parsed["cite"] = cite.text
+ parsed["timestamp"] = timestamp.text
else:
all_result = list(result.children)
title = all_result[1]
- cite = all_result[0]
- timestamp = all_result[2] # dates are no relative vs absolute
- parsed['title'] = title.div.text
- parsed['url'] = title['href']
- parsed['cite'] = cite.span.text
- parsed['timestamp'] = timestamp.div.text
-
- parsed['text'] = quote.text
-
- return parsed
+ cite = all_result[0]
+ timestamp = all_result[2] # dates are no relative vs absolute
+ parsed["title"] = title.div.text
+ parsed["url"] = title["href"]
+ parsed["cite"] = cite.span.text
+ parsed["timestamp"] = timestamp.div.text
+ parsed["text"] = quote.text
+
+ return parsed
diff --git a/WebSearcher/component_parsers/notices.py b/WebSearcher/component_parsers/notices.py
index 6e0e1a6..dc582dc 100644
--- a/WebSearcher/component_parsers/notices.py
+++ b/WebSearcher/component_parsers/notices.py
@@ -1,6 +1,7 @@
-import re
import copy
-from ..webutils import get_text
+import re
+
+from ..utils import get_text
def parse_notices(cmpt) -> list:
@@ -16,23 +17,23 @@ def __init__(self):
"query_edit": {"Showing results for", "Including results for"},
"query_edit_no_results": {"No results found for"},
"query_suggestion": {
- "Did you mean:",
- "Are you looking for:",
- "Search for this instead?",
- "Did you mean to search for:",
- "Search instead for:"
+ "Did you mean:",
+ "Are you looking for:",
+ "Search for this instead?",
+ "Did you mean to search for:",
+ "Search instead for:",
},
"location_choose_area": {"Results for", "Choose area"},
"location_use_precise_location": {"Results for", "Use precise location"},
- "language_tip": {"Tip:", "Learn more about filtering by language"}
+ "language_tip": {"Tip:", "Learn more about filtering by language"},
}
self.parser_dict = {
- 'query_edit': self._parse_query_edit,
- 'query_edit_no_results': self._parse_no_results_replacement,
- 'query_suggestion': self._parse_query_suggestion,
- 'location_choose_area': self._parse_location_choose_area,
- 'location_use_precise_location': self._parse_location_use_precise_location,
- 'language_tip': self._parse_language_tip
+ "query_edit": self._parse_query_edit,
+ "query_edit_no_results": self._parse_no_results_replacement,
+ "query_suggestion": self._parse_query_suggestion,
+ "location_choose_area": self._parse_location_choose_area,
+ "location_use_precise_location": self._parse_location_use_precise_location,
+ "language_tip": self._parse_language_tip,
}
def parse_notices(self, cmpt) -> list:
@@ -46,7 +47,7 @@ def parse_notices(self, cmpt) -> list:
def _classify_sub_type(self, cmpt) -> str:
"""Classify the sub-type of a query notice component"""
cmpt_text = cmpt.text.strip()
- cmpt_text = re.sub(r'\s+', ' ', cmpt_text)
+ cmpt_text = re.sub(r"\s+", " ", cmpt_text)
for sub_type, text_list in self.sub_type_text.items():
if sub_type.startswith("location_"):
@@ -68,48 +69,50 @@ def _parse_sub_type(self, cmpt):
self.parsed = sub_parser(cmpt)
def _package_parsed(self):
- self.parsed_list = [{
- 'type': 'notice',
- 'sub_type': self.sub_type,
- 'sub_rank': 0,
- 'title': self.parsed.get('title', None),
- 'text': self.parsed.get('text', None)
- }]
+ self.parsed_list = [
+ {
+ "type": "notice",
+ "sub_type": self.sub_type,
+ "sub_rank": 0,
+ "title": self.parsed.get("title", None),
+ "text": self.parsed.get("text", None),
+ }
+ ]
def _parse_no_results_replacement(self, cmpt) -> dict:
output = {"title": None, "text": None}
cmpt = copy.copy(cmpt)
- div_title = cmpt.find('div', {'role':'heading', 'aria-level': '2'})
+ div_title = cmpt.find("div", {"role": "heading", "aria-level": "2"})
if div_title:
- output['title'] = div_title.text.strip()
+ output["title"] = div_title.text.strip()
div_title.extract()
div_text = cmpt.find("div", {"class": "card-section"})
if div_text:
- output['text'] = div_text.text.strip()
+ output["text"] = div_text.text.strip()
return output
def _parse_query_edit(self, cmpt) -> dict:
output = {"title": None, "text": None}
- showing_results_span = cmpt.find('span', class_='gL9Hy')
+ showing_results_span = cmpt.find("span", class_="gL9Hy")
if showing_results_span:
- output['title'] = showing_results_span.text.strip()
+ output["title"] = showing_results_span.text.strip()
- modified_query_link = cmpt.find('a', id='fprsl')
+ modified_query_link = cmpt.find("a", id="fprsl")
if modified_query_link:
modified_query = modified_query_link.text.strip()
- output['title'] += f" {modified_query}"
+ output["title"] += f" {modified_query}"
- search_instead_span = cmpt.find('span', class_='spell_orig')
+ search_instead_span = cmpt.find("span", class_="spell_orig")
if search_instead_span:
- output['text'] = search_instead_span.text.strip()
+ output["text"] = search_instead_span.text.strip()
- original_query_link = cmpt.find('a', class_='spell_orig')
+ original_query_link = cmpt.find("a", class_="spell_orig")
if original_query_link:
original_query = original_query_link.text.strip()
- output['text'] += f" {original_query}"
+ output["text"] += f" {original_query}"
return output
def _parse_query_suggestion(self, cmpt) -> dict:
@@ -117,53 +120,54 @@ def _parse_query_suggestion(self, cmpt) -> dict:
# check in div and span with same class
cmpt_checks = {
- cmpt.find('span', class_='gL9Hy'),
- cmpt.find('div', class_='gL9Hy')
+ cmpt.find("span", class_="gL9Hy"),
+ cmpt.find("div", class_="gL9Hy"),
}
for cmpt_check in cmpt_checks:
if cmpt_check:
- output['title'] = cmpt_check.text.strip()
+ output["title"] = cmpt_check.text.strip()
break
- suggestion_links = cmpt.find_all('a', class_='gL9Hy')
- suggested_queries = [get_text(suggestion_link) for suggestion_link in suggestion_links if suggestion_link]
- output['text'] = '<|>'.join(suggested_queries)
+ suggestion_links = cmpt.find_all("a", class_="gL9Hy")
+ suggested_queries = [
+ get_text(suggestion_link) for suggestion_link in suggestion_links if suggestion_link
+ ]
+ output["text"] = "<|>".join(suggested_queries)
return output
def _parse_location_choose_area(self, cmpt) -> dict:
output = {"title": None, "text": None}
-
+
# Extract the main heading
- heading = cmpt.find('div', class_='eKPi4')
+ heading = cmpt.find("div", class_="eKPi4")
if heading:
- results_for_span = heading.find('span', class_='gm7Ysb')
- location_span = heading.find('span', class_='BBwThe')
-
+ results_for_span = heading.find("span", class_="gm7Ysb")
+ location_span = heading.find("span", class_="BBwThe")
+
if results_for_span and location_span:
- output['title'] = f"{results_for_span.text.strip()} {location_span.text.strip()}"
-
+ output["title"] = f"{results_for_span.text.strip()} {location_span.text.strip()}"
+
return output
def _parse_location_use_precise_location(self, cmpt) -> dict:
output = {"title": None, "text": None}
-
+
# Extract the main heading
- heading = cmpt.find('div', class_='eKPi4')
+ heading = cmpt.find("div", class_="eKPi4")
if heading:
- results_for_span = heading.find('span', class_='gm7Ysb')
- location_span = heading.find('span', class_='BBwThe')
-
+ results_for_span = heading.find("span", class_="gm7Ysb")
+ location_span = heading.find("span", class_="BBwThe")
+
if results_for_span and location_span:
- output['title'] = f"{results_for_span.text.strip()} {location_span.text.strip()}"
-
+ output["title"] = f"{results_for_span.text.strip()} {location_span.text.strip()}"
+
return output
def _parse_language_tip(self, cmpt) -> dict:
- output = {"title": None, "text": None}
- title_div = cmpt.find('div', class_='Ww4FFb')
+ output = {"title": None, "text": None}
+ title_div = cmpt.find("div", class_="Ww4FFb")
if title_div:
- output['title'] = re.sub(r'\s+', ' ', title_div.text)
+ output["title"] = re.sub(r"\s+", " ", title_div.text)
return output
-
diff --git a/WebSearcher/component_parsers/people_also_ask.py b/WebSearcher/component_parsers/people_also_ask.py
index e1b1634..dbee54c 100644
--- a/WebSearcher/component_parsers/people_also_ask.py
+++ b/WebSearcher/component_parsers/people_also_ask.py
@@ -1,43 +1,43 @@
-from .. import webutils
+from .. import utils
QUESTION_SELECTORS = [
- ('div', {'class': 'rc'}),
- ('div', {'class': 'yuRUbf'}),
- ('div', {'class': 'iDjcJe'}), # 2023-01-01
- ('div', {'class': 'JlqpRe'}), # 2023-11-16
- ('div', {'class': 'cbphWd'}), # 2021-01-09
+ ("div", {"class": "rc"}),
+ ("div", {"class": "yuRUbf"}),
+ ("div", {"class": "iDjcJe"}), # 2023-01-01
+ ("div", {"class": "JlqpRe"}), # 2023-11-16
+ ("div", {"class": "cbphWd"}), # 2021-01-09
]
def parse_people_also_ask(cmpt, sub_rank=0) -> list:
"""Parse a "People Also Ask" component
- These components contain a list of questions, which drop down to reveal
- summarized information and/or general component results. However, browser
- automation is required to preserve the information in the dropdown, which
+ These components contain a list of questions, which drop down to reveal
+ summarized information and/or general component results. However, browser
+ automation is required to preserve the information in the dropdown, which
only loads after a subcomponent is clicked.
-
+
Args:
cmpt (bs4 object): A "People Also Ask" component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
# questions = cmpt.find_all('g-accordion-expander')
# questions = cmpt.find('section').find_all('div', {'class':'yTrXHe'})
- questions = cmpt.find_all("div", {"class":"related-question-pair"})
+ questions = cmpt.find_all("div", {"class": "related-question-pair"})
parsed_questions = [parse_question(q) for q in questions]
parsed_questions = list(filter(None, parsed_questions))
parsed = {
- 'type': 'people_also_ask',
- 'sub_rank': sub_rank,
- 'text': "<|>".join(parsed_questions) if parsed_questions else None,
- 'details': parsed_questions if parsed_questions else None
+ "type": "people_also_ask",
+ "sub_rank": sub_rank,
+ "text": "<|>".join(parsed_questions) if parsed_questions else None,
+ "details": {"type": "text", "items": parsed_questions} if parsed_questions else None,
}
return [parsed]
def parse_question(question):
"""Parse an individual question in a "People Also Ask" component"""
- return webutils.get_text_by_selectors(question, QUESTION_SELECTORS, strip=True)
\ No newline at end of file
+ return utils.get_text_by_selectors(question, QUESTION_SELECTORS, strip=True)
diff --git a/WebSearcher/component_parsers/perspectives.py b/WebSearcher/component_parsers/perspectives.py
index 70f6d28..948296b 100644
--- a/WebSearcher/component_parsers/perspectives.py
+++ b/WebSearcher/component_parsers/perspectives.py
@@ -1,5 +1,6 @@
from .top_stories import parse_top_stories
+
def parse_perspectives(cmpt):
"""Parse a "Perspectives & opinions" component
@@ -17,7 +18,7 @@ def parse_perspectives(cmpt):
header = cmpt.find("h2", {"role": "heading"})
sub_type = header.text.strip().lower().replace(" ", "_") if header else None
- results = parse_top_stories(cmpt, ctype='perspectives')
+ results = parse_top_stories(cmpt, ctype="perspectives")
for result in results:
- result['sub_type'] = sub_type
+ result["sub_type"] = sub_type
return results
diff --git a/WebSearcher/component_parsers/recent_posts.py b/WebSearcher/component_parsers/recent_posts.py
index ee0a24d..45af069 100644
--- a/WebSearcher/component_parsers/recent_posts.py
+++ b/WebSearcher/component_parsers/recent_posts.py
@@ -1,14 +1,15 @@
from .top_stories import parse_top_stories
+
def parse_recent_posts(cmpt):
"""Parse a "Recent posts" component
These components have a similar carousel as Top Stories and Perspectives.
-
+
Args:
cmpt (bs4 object): A html component
-
+
Returns:
dict : parsed result
"""
- return parse_top_stories(cmpt, ctype='recent_posts')
+ return parse_top_stories(cmpt, ctype="recent_posts")
diff --git a/WebSearcher/component_parsers/scholarly_articles.py b/WebSearcher/component_parsers/scholarly_articles.py
index 082a6a5..eaa442e 100644
--- a/WebSearcher/component_parsers/scholarly_articles.py
+++ b/WebSearcher/component_parsers/scholarly_articles.py
@@ -2,29 +2,30 @@ def parse_scholarly_articles(cmpt) -> list:
"""Parse a scholarly articles component
These components contain links to academic articles via Google Scholar
-
+
Args:
cmpt (bs4 object): A scholarly_articles component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
- subs = cmpt.find_all('tr')[1].find_all('div')
+ subs = cmpt.find_all("tr")[1].find_all("div")
return [parse_article(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
+
def parse_article(sub, sub_rank=0) -> dict:
"""Parse a scholarly articles subcomponent
-
+
Args:
sub (bs4 object): A scholarly articles subcomponent
-
+
Returns:
dict : parsed subresult
"""
- parsed = {'type':'scholarly_articles', 'sub_rank':sub_rank}
- parsed['title'] = sub.text
- if sub.find('a'):
- parsed['url'] = sub.find('a').attrs['href']
- parsed['title'] = sub.find('a').text
- parsed['cite'] = sub.find('span').text.replace(' - \u200e', '')
+ parsed = {"type": "scholarly_articles", "sub_rank": sub_rank}
+ parsed["title"] = sub.text
+ if sub.find("a"):
+ parsed["url"] = sub.find("a").attrs["href"]
+ parsed["title"] = sub.find("a").text
+ parsed["cite"] = sub.find("span").text.replace(" - \u200e", "")
return parsed
diff --git a/WebSearcher/component_parsers/searches_related.py b/WebSearcher/component_parsers/searches_related.py
index 817d1e8..68871a4 100644
--- a/WebSearcher/component_parsers/searches_related.py
+++ b/WebSearcher/component_parsers/searches_related.py
@@ -1,4 +1,4 @@
-from .. import webutils
+from .. import utils
HEADER_SELECTORS = [
("h2", {"role": "heading"}),
@@ -9,43 +9,45 @@
def parse_searches_related(cmpt, sub_rank=0) -> list:
"""Parse a one or two column list of related search queries"""
- parsed = {'type':'searches_related',
- 'sub_rank':sub_rank,
- 'title': None,
- 'url': None}
+ parsed = {
+ "type": "searches_related",
+ "sub_rank": sub_rank,
+ "title": None,
+ "url": None,
+ }
# Set first non-empty header as sub_type (e.g. "Additional searches" -> additional_searches)
- header = webutils.get_text_by_selectors(cmpt, HEADER_SELECTORS)
- parsed['sub_type'] = header.lower().replace(" ", "_") if header else None
+ header = utils.get_text_by_selectors(cmpt, HEADER_SELECTORS)
+ parsed["sub_type"] = header.lower().replace(" ", "_") if header else None
output_list = []
# Classic search query suggestions
- subs = webutils.find_all_divs(cmpt, 'a', {'class':'k8XOCe'})
+ subs = utils.find_all_divs(cmpt, "a", {"class": "k8XOCe"})
text_list = [sub.text.strip() for sub in subs]
output_list.extend(filter(None, text_list))
# Curated list (e.g. song names)
- subs = webutils.find_all_divs(cmpt, 'div', {'class':'EASEnb'})
+ subs = utils.find_all_divs(cmpt, "div", {"class": "EASEnb"})
text_list = [sub.text.strip() for sub in subs]
output_list.extend(filter(None, text_list))
# Other list types
- subs = webutils.find_all_divs(cmpt, 'div', {'role':'listitem'})
+ subs = utils.find_all_divs(cmpt, "div", {"role": "listitem"})
text_list = [sub.text.strip() for sub in subs]
output_list.extend(filter(None, text_list))
-
+
# Accordion list
- if cmpt.find('explore-desktop-accordion'):
- subs = webutils.find_all_divs(cmpt, 'div', {'class':'JXa4nd'})
- text_list = [webutils.get_text(sub, 'div', {'class':'Cx1ZMc'}) for sub in subs]
+ if cmpt.find("explore-desktop-accordion"):
+ subs = utils.find_all_divs(cmpt, "div", {"class": "JXa4nd"})
+ text_list = [utils.get_text(sub, "div", {"class": "Cx1ZMc"}) for sub in subs]
output_list.extend(filter(None, text_list))
- if cmpt.find('div', {"class":'brs_col'}):
- subs = webutils.find_all_divs(cmpt, 'a')
+ if cmpt.find("div", {"class": "brs_col"}):
+ subs = utils.find_all_divs(cmpt, "a")
link_text = [sub.text.strip() for sub in subs]
output_list.extend(filter(None, link_text))
- parsed['text'] = '<|>'.join(output_list)
- parsed['details'] = output_list
+ parsed["text"] = "<|>".join(output_list)
+ parsed["details"] = {"type": "text", "items": output_list} if output_list else None
return [parsed]
diff --git a/WebSearcher/component_parsers/shopping_ads.py b/WebSearcher/component_parsers/shopping_ads.py
index 77041f0..d4c2d31 100644
--- a/WebSearcher/component_parsers/shopping_ads.py
+++ b/WebSearcher/component_parsers/shopping_ads.py
@@ -12,73 +12,73 @@ def parse_shopping_ads(cmpt) -> list:
"""
# Sponsored hotel carousel (atvcap)
- cards = cmpt.find_all(attrs={'role': 'listitem'})
+ cards = cmpt.find_all(attrs={"role": "listitem"})
if cards:
return [_parse_sponsored_hotel(card, i) for i, card in enumerate(cards)]
# Standard product listing ads (pla-unit)
- subs = cmpt.find_all('div', {'class':'mnr-c pla-unit'})
+ subs = cmpt.find_all("div", {"class": "mnr-c pla-unit"})
return [_parse_pla_unit(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
def _parse_pla_unit(sub, sub_rank=0) -> dict:
"""Parse a standard product listing ad"""
- parsed = {'type': 'shopping_ads', 'sub_rank': sub_rank}
+ parsed = {"type": "shopping_ads", "sub_rank": sub_rank}
- card = sub.find('a', {'class': 'clickable-card'})
+ card = sub.find("a", {"class": "clickable-card"})
if card:
- parsed['url'] = card['href']
- parsed['title'] = card['aria-label']
+ parsed["url"] = card["href"]
+ parsed["title"] = card["aria-label"]
return parsed
def _parse_sponsored_hotel(card, sub_rank=0) -> dict:
"""Parse a sponsored hotel card from the atvcap carousel"""
- name_div = card.find('div', {'class': 'KZYtMc'})
- price_div = card.find('div', {'class': 'XO8mWb'})
- source_div = card.find('div', {'class': 'sX5I1c'})
- rating_span = card.find('span', {'class': 'Y0A0hc'})
+ name_div = card.find("div", {"class": "KZYtMc"})
+ price_div = card.find("div", {"class": "XO8mWb"})
+ source_div = card.find("div", {"class": "sX5I1c"})
+ rating_span = card.find("span", {"class": "Y0A0hc"})
# Star level and amenity are in role=text spans
- role_texts = card.find_all('span', {'role': 'text', 'class': 'cHaqb'})
+ role_texts = card.find_all("span", {"role": "text", "class": "cHaqb"})
stars = role_texts[0].get_text(strip=True) if len(role_texts) > 0 else None
amenity = role_texts[1].get_text(strip=True) if len(role_texts) > 1 else None
# Rating text includes review count (e.g. "3.2(345)")
rating_text = rating_span.get_text(strip=True) if rating_span else None
rating = None
- reviews = None
+ n_reviews = None
if rating_text:
- paren = rating_text.find('(')
+ paren = rating_text.find("(")
if paren > 0:
rating = rating_text[:paren]
- reviews = rating_text[paren:]
+ n_reviews = rating_text[paren:].strip("()")
else:
rating = rating_text
- details = {}
+ details = {"type": "ratings"}
if price_div:
- details['price'] = price_div.get_text(strip=True)
+ details["price"] = price_div.get_text(strip=True)
if source_div:
- details['source'] = source_div.get_text(strip=True)
- if rating:
- details['rating'] = rating
- if reviews:
- details['reviews'] = reviews
+ details["source"] = source_div.get_text(strip=True)
+ if rating is not None:
+ details["rating"] = rating
+ if n_reviews is not None:
+ details["n_reviews"] = n_reviews
if stars:
- details['stars'] = stars
+ details["stars"] = stars
if amenity:
- details['amenity'] = amenity
+ details["amenity"] = amenity
return {
- 'type': 'shopping_ads',
- 'sub_type': 'hotels',
- 'sub_rank': sub_rank,
- 'title': name_div.get_text(strip=True) if name_div else None,
- 'url': card.find('a', href=True).get('href') if card.find('a', href=True) else None,
- 'text': None,
- 'cite': None,
- 'details': details if details else None,
+ "type": "shopping_ads",
+ "sub_type": "hotels",
+ "sub_rank": sub_rank,
+ "title": name_div.get_text(strip=True) if name_div else None,
+ "url": card.find("a", href=True).get("href") if card.find("a", href=True) else None,
+ "text": None,
+ "cite": None,
+ "details": details if details else None,
}
diff --git a/WebSearcher/component_parsers/short_videos.py b/WebSearcher/component_parsers/short_videos.py
index c6a2552..a0df85e 100644
--- a/WebSearcher/component_parsers/short_videos.py
+++ b/WebSearcher/component_parsers/short_videos.py
@@ -1,4 +1,4 @@
-from .. import webutils
+from .. import utils
def parse_short_videos(cmpt) -> list:
@@ -12,25 +12,24 @@ def parse_short_videos(cmpt) -> list:
"""
# Filter to full card links (with heading), skip thumbnail-only duplicates
cards = [
- a for a in cmpt.find_all('a', {'class': 'rIRoqf'})
- if a.find('div', {'role': 'heading'})
+ a for a in cmpt.find_all("a", {"class": "rIRoqf"}) if a.find("div", {"role": "heading"})
]
if not cards:
- return [{'type': 'short_videos', 'sub_rank': 0}]
+ return [{"type": "short_videos", "sub_rank": 0}]
parsed_list = []
for i, card in enumerate(cards):
parsed = {
- 'type': 'short_videos',
- 'sub_rank': i,
- 'url': card.get('href'),
- 'title': webutils.get_text(card, 'div', {'role': 'heading'}),
+ "type": "short_videos",
+ "sub_rank": i,
+ "url": card.get("href"),
+ "title": utils.get_text(card, "div", {"role": "heading"}),
}
# Get source (YouTube, TikTok, etc.) and duration
- cite = webutils.get_text(card, 'span', {'class': 'xFMKFe'})
+ cite = utils.get_text(card, "span", {"class": "xFMKFe"})
if cite:
- parsed['cite'] = cite
+ parsed["cite"] = cite
parsed_list.append(parsed)
diff --git a/WebSearcher/component_parsers/top_image_carousel.py b/WebSearcher/component_parsers/top_image_carousel.py
index fcf37f7..2d688b4 100644
--- a/WebSearcher/component_parsers/top_image_carousel.py
+++ b/WebSearcher/component_parsers/top_image_carousel.py
@@ -1,5 +1,4 @@
-from .. import webutils
-from ..models.data import DetailsItem, DetailsList
+from .. import utils
def parse_top_image_carousel(cmpt, sub_rank=0) -> list:
@@ -11,28 +10,29 @@ def parse_top_image_carousel(cmpt, sub_rank=0) -> list:
Returns:
list: list of parsed subcomponent dictionaries
"""
-
- parsed = {'type':'top_image_carousel', 'sub_rank':sub_rank}
- title = cmpt.find_all('span', {'class': 'Wkr6U'})
+ parsed = {"type": "top_image_carousel", "sub_rank": sub_rank}
+
+ title = cmpt.find_all("span", {"class": "Wkr6U"})
if title:
- parsed['title'] = '|'.join([t.text for t in title])
- parsed['url'] = webutils.get_link(cmpt)
+ parsed["title"] = "|".join([t.text for t in title])
+ parsed["url"] = utils.get_link(cmpt)
- images = cmpt.find('div', {'role':'list'})
+ images = cmpt.find("div", {"role": "list"})
if images:
alinks = images.children
else:
- alinks = cmpt.find('g-scrolling-carousel').find_all('a')
-
- details = DetailsList()
+ alinks = cmpt.find("g-scrolling-carousel").find_all("a")
+
+ items = []
for a in alinks:
- if 'href' in a.attrs or 'data-url' in a.attrs:
- details.append(parse_alink(a))
- parsed['details'] = details.to_dicts()
+ if "href" in a.attrs or "data-url" in a.attrs:
+ items.append(parse_alink(a))
+ parsed["details"] = {"type": "hyperlinks", "items": items} if items else None
return [parsed]
+
def parse_alink(a):
- url = a.attrs.get('href') or a.attrs.get('data-url', '')
- return DetailsItem(url=url, text=a.get_text('|'))
+ url = a.attrs.get("href") or a.attrs.get("data-url", "")
+ return {"url": url, "text": a.get_text("|")}
diff --git a/WebSearcher/component_parsers/top_stories.py b/WebSearcher/component_parsers/top_stories.py
index f54b692..b9efafd 100644
--- a/WebSearcher/component_parsers/top_stories.py
+++ b/WebSearcher/component_parsers/top_stories.py
@@ -1,93 +1,98 @@
-from ..webutils import find_all_divs, find_children, get_text, get_text_by_selectors, get_link
+from ..utils import (
+ find_all_divs,
+ find_children,
+ get_link,
+ get_text,
+ get_text_by_selectors,
+)
TITLE_SELECTORS = [
- ('div', {'class': 'n0jPhd'}), # Top Stories
- ('div', {'class': 'eAaXgc'}), # Perspectives
+ ("div", {"class": "n0jPhd"}), # Top Stories
+ ("div", {"class": "eAaXgc"}), # Perspectives
]
-def parse_top_stories(cmpt, ctype='top_stories') -> list:
+def parse_top_stories(cmpt, ctype="top_stories") -> list:
"""Parse a "Top Stories" component
These components contain links to news articles and often feature an image.
Sometimes the subcomponents are stacked vertically, and sometimes they are
- stacked horizontally and feature a larger image, resembling the video
+ stacked horizontally and feature a larger image, resembling the video
component.
-
+
Args:
cmpt (bs4 object): A "Top Stories" component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
-
+
# Known div structures
divs = []
- divs.extend(find_all_divs(cmpt, 'g-inner-card')) # Top Stories
- divs.extend(find_children(cmpt, 'div', {'class': 'qmv19b'})) # Top Stories
- divs.extend(find_all_divs(cmpt, 'div', {'class': 'IJl0Z'})) # Top Stories
- divs.extend(find_all_divs(cmpt, 'div', {'class': 'JJZKK'})) # Perspectives
+ divs.extend(find_all_divs(cmpt, "g-inner-card")) # Top Stories
+ divs.extend(find_children(cmpt, "div", {"class": "qmv19b"})) # Top Stories
+ divs.extend(find_all_divs(cmpt, "div", {"class": "IJl0Z"})) # Top Stories
+ divs.extend(find_all_divs(cmpt, "div", {"class": "JJZKK"})) # Perspectives
if not divs:
# This will double count if divs already found above
- link_divs = find_all_divs(cmpt, 'a', {'class': 'WlydOe'}) # Top Stories - Vertical
- divs.extend([div.parent for div in link_divs])
+ link_divs = find_all_divs(cmpt, "a", {"class": "WlydOe"}) # Top Stories - Vertical
+ divs.extend([div.parent for div in link_divs])
divs = list(filter(None, divs))
if divs:
return [parse_top_story(div, ctype, i) for i, div in enumerate(divs)]
else:
- return [{'type': ctype, 'sub_rank': 0, 'error': 'No subcomponents found'}]
+ return [{"type": ctype, "sub_rank": 0, "error": "No subcomponents found"}]
def parse_top_story(sub, ctype, sub_rank=0) -> dict:
"""Parse "Top Stories" component"""
parsed = {
- 'type': ctype,
- 'sub_rank': sub_rank,
- 'title': get_text_by_selectors(sub, TITLE_SELECTORS),
- 'url': get_link(sub, key='href'),
- 'text': get_text(sub, "div", {'class': "GI74Re"}),
- 'cite': get_cite(sub)
+ "type": ctype,
+ "sub_rank": sub_rank,
+ "title": get_text_by_selectors(sub, TITLE_SELECTORS),
+ "url": get_link(sub, key="href"),
+ "text": get_text(sub, "div", {"class": "GI74Re"}),
+ "cite": get_cite(sub),
}
return parsed
-
def get_cite(sub):
- div_cite = sub.find("div", {'class': 'Dx69l'})
- img_cite = sub.find('g-img', {'class': 'sL0zmc'})
- span_cite = sub.find('g-img', {'class': 'QyR1Ze'})
-
+ div_cite = sub.find("div", {"class": "Dx69l"})
+ img_cite = sub.find("g-img", {"class": "sL0zmc"})
+ span_cite = sub.find("g-img", {"class": "QyR1Ze"})
+
if div_cite:
# Perspectives
- cite = get_text(sub, 'div', {'class': 'Dx69l'})
+ cite = get_text(sub, "div", {"class": "Dx69l"})
elif img_cite:
# Top Stories (image cite, get "alt" image text)
- img = img_cite.find('img')
- if img and 'alt' in img.attrs:
- cite = img.attrs['alt']
+ img = img_cite.find("img")
+ if img and "alt" in img.attrs:
+ cite = img.attrs["alt"]
elif span_cite:
- cite = get_text(sub, 'span')
+ cite = get_text(sub, "span")
else:
- cite = get_text(sub, 'cite')
+ cite = get_text(sub, "cite")
return cite
def get_top_story_details(sub):
# Extract component specific details
details = {}
- details['img_url'] = get_img_url(sub)
- details['orient'] = 'v' if sub.find('span', {'class':'uaCsqe'}) else 'h'
- details['live_stamp'] = True if sub.find('span', {'class':'EugGe'}) else False
+ details["img_url"] = get_img_url(sub)
+ details["orient"] = "v" if sub.find("span", {"class": "uaCsqe"}) else "h"
+ details["live_stamp"] = True if sub.find("span", {"class": "EugGe"}) else False
return details
def get_img_url(soup):
- """Extract image source"""
- img = soup.find('img')
- if img and 'data-src' in img.attrs:
- return img.attrs['data-src']
+ """Extract image source"""
+ img = soup.find("img")
+ if img and "data-src" in img.attrs:
+ return img.attrs["data-src"]
diff --git a/WebSearcher/component_parsers/twitter_cards.py b/WebSearcher/component_parsers/twitter_cards.py
index a014b5a..db7e7a8 100644
--- a/WebSearcher/component_parsers/twitter_cards.py
+++ b/WebSearcher/component_parsers/twitter_cards.py
@@ -1,4 +1,4 @@
-from .. import webutils
+from .. import utils
def parse_twitter_cards(cmpt) -> list:
@@ -10,57 +10,53 @@ def parse_twitter_cards(cmpt) -> list:
Args:
cmpt (bs4 object): A twitter cards component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
# header, carousel = list(cmpt.find('g-section-with-header').children)[:2]
parsed_header = parse_twitter_header(cmpt)
- carousel = cmpt.find('g-scrolling-carousel')
- subs = carousel.find_all('g-inner-card')
+ carousel = cmpt.find("g-scrolling-carousel")
+ subs = carousel.find_all("g-inner-card")
parsed_cards = [parse_twitter_card(sub, sub_rank + 1) for sub_rank, sub in enumerate(subs)]
parsed_list = [parsed_header] + parsed_cards
return parsed_list
-def parse_twitter_header(cmpt, sub_rank:int = 0) -> dict:
+def parse_twitter_header(cmpt, sub_rank: int = 0) -> dict:
"""Parse a Twitter header from the main component"""
- parsed = {"type": "twitter_cards",
- "sub_type": "header",
- "sub_rank": sub_rank}
- element_current = cmpt.find('g-link')
- element_legacy = cmpt.find('h3', {'class':'r'})
- if cmpt.find('h3'):
+ parsed = {"type": "twitter_cards", "sub_type": "header", "sub_rank": sub_rank}
+ element_current = cmpt.find("g-link")
+ element_legacy = cmpt.find("h3", {"class": "r"})
+ if cmpt.find("h3"):
if element_legacy:
- parsed['url'] = webutils.url_unquote(element_legacy.get('href', ''))
- parsed['title'] = webutils.get_text(element_legacy, 'a')
+ parsed["url"] = utils.url_unquote(element_legacy.get("href", ""))
+ parsed["title"] = utils.get_text(element_legacy, "a")
elif element_current:
- parsed['url'] = webutils.url_unquote(webutils.get_link(element_current))
- parsed['title'] = webutils.get_text(element_current)
+ parsed["url"] = utils.url_unquote(utils.get_link(element_current))
+ parsed["title"] = utils.get_text(element_current)
elif element_current:
- parsed['url'] = webutils.get_link(element_current)
- parsed['title'] = webutils.get_text(element_current)
- parsed["cite"] = webutils.get_text(cmpt, 'cite')
+ parsed["url"] = utils.get_link(element_current)
+ parsed["title"] = utils.get_text(element_current)
+ parsed["cite"] = utils.get_text(cmpt, "cite")
return parsed
-def parse_twitter_card(sub, sub_rank:int = 0) -> dict:
+def parse_twitter_card(sub, sub_rank: int = 0) -> dict:
"""Parse a Twitter card from a subcomponent"""
- parsed = {"type": "twitter_cards",
- "sub_type": "card",
- "sub_rank": sub_rank}
+ parsed = {"type": "twitter_cards", "sub_type": "card", "sub_rank": sub_rank}
# Tweet account
- title = sub.find('g-link')
- parsed["title"] = webutils.get_text(title, 'a') if title else None
+ title = sub.find("g-link")
+ parsed["title"] = utils.get_text(title, "a") if title else None
# Bottom div containing details
- div = sub.find('div', {'class':'Brgz0'})
+ div = sub.find("div", {"class": "Brgz0"})
if div:
- url = webutils.get_link(div)
- parsed["url"] = webutils.url_unquote(url) if url else None
- parsed["text"] = webutils.get_text(div, 'div', {'class':'xcQxib'})
- parsed["cite"] = webutils.get_text(div, 'div', {'class':'rmxqbe'})
+ url = utils.get_link(div)
+ parsed["url"] = utils.url_unquote(url) if url else None
+ parsed["text"] = utils.get_text(div, "div", {"class": "xcQxib"})
+ parsed["cite"] = utils.get_text(div, "div", {"class": "rmxqbe"})
return parsed
diff --git a/WebSearcher/component_parsers/twitter_result.py b/WebSearcher/component_parsers/twitter_result.py
index e2814c7..36890a7 100644
--- a/WebSearcher/component_parsers/twitter_result.py
+++ b/WebSearcher/component_parsers/twitter_result.py
@@ -1,36 +1,38 @@
-from ..webutils import get_text, get_link
+from ..utils import get_link, get_text
+
def parse_twitter_result(cmpt, sub_rank=0) -> list:
"""Parse a Twitter single result component
These components look like general components, but link to a Twitter account
and sometimes have a tweet in the summary.
-
+
Args:
cmpt (bs4 object): A twitter cards component
-
+
Returns:
list : list of parsed subcomponent dictionaries
- """
- parsed = {'type':'twitter_result', 'sub_rank':sub_rank}
+ """
+ parsed = {"type": "twitter_result", "sub_rank": sub_rank}
# Header
- header = cmpt.find('div', {'class':'DOqJne'})
+ header = cmpt.find("div", {"class": "DOqJne"})
if header:
- title = header.find('g-link')
+ title = header.find("g-link")
# Get title
if title:
- parsed['title'] = title.find('a').text
- parsed['url'] = title.find('a')['href']
+ parsed["title"] = title.find("a").text
+ parsed["url"] = title.find("a")["href"]
# Get citation
- cite = header.find('cite')
+ cite = header.find("cite")
if cite:
- parsed['cite'] = cite.text
-
+ parsed["cite"] = cite.text
+
# Get snippet text, timestamp, and tweet url
- body, timestamp_url = cmpt.find('div', {'class':'tw-res'}).children
- parsed['text'] = get_text(body)
- parsed['timestamp'] = get_text(timestamp_url, 'span')
- parsed['details'] = get_link(timestamp_url)
- return [parsed]
\ No newline at end of file
+ body, timestamp_url = cmpt.find("div", {"class": "tw-res"}).children
+ parsed["text"] = get_text(body)
+ parsed["timestamp"] = get_text(timestamp_url, "span")
+ tweet_url = get_link(timestamp_url)
+ parsed["details"] = {"type": "tweet", "url": tweet_url} if tweet_url else None
+ return [parsed]
diff --git a/WebSearcher/component_parsers/videos.py b/WebSearcher/component_parsers/videos.py
index cd374d3..d6e46fd 100644
--- a/WebSearcher/component_parsers/videos.py
+++ b/WebSearcher/component_parsers/videos.py
@@ -1,4 +1,4 @@
-""" Parsers for video components
+"""Parsers for video components
Changelog
2024-05-08: added find_all for divs with class 'VibNM'
@@ -7,16 +7,17 @@
"""
-from .. import webutils
+from .. import utils
+
def parse_videos(cmpt) -> list:
"""Parse a videos component
These components contain links to videos, frequently to YouTube.
-
+
Args:
cmpt (bs4 object): A videos component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
@@ -24,14 +25,14 @@ def parse_videos(cmpt) -> list:
# Get known div structures
divs = []
name_attrs = [
- ({'name':'g-inner-card'}, 'unspecified-0'),
- ({'name':'div', 'attrs':{'class':'VibNM'}}, 'unspecified-1'),
- ({'name':'div', 'attrs':{'class':'mLmaBd'}}, 'unspecified-2'),
- ({'name':'div', 'attrs':{'class':'RzdJxc'}}, 'unspecified-3'),
- ({'name':'div', 'attrs':{'class':'sHEJob'}}, 'vertical'),
+ ({"name": "g-inner-card"}, "unspecified-0"),
+ ({"name": "div", "attrs": {"class": "VibNM"}}, "unspecified-1"),
+ ({"name": "div", "attrs": {"class": "mLmaBd"}}, "unspecified-2"),
+ ({"name": "div", "attrs": {"class": "RzdJxc"}}, "unspecified-3"),
+ ({"name": "div", "attrs": {"class": "sHEJob"}}, "vertical"),
]
for kwargs, sub_type in name_attrs:
- divs = webutils.find_all_divs(cmpt, **kwargs)
+ divs = utils.find_all_divs(cmpt, **kwargs)
if divs:
break
divs = list(filter(None, divs))
@@ -39,79 +40,80 @@ def parse_videos(cmpt) -> list:
if divs:
return [parse_video(div, sub_type, i) for i, div in enumerate(divs)]
else:
- return [{'type': 'videos', 'sub_rank': 0, 'error': 'No subcomponents found'}]
+ return [{"type": "videos", "sub_rank": 0, "error": "No subcomponents found"}]
def parse_video(sub, sub_type: str, sub_rank=0) -> dict:
"""Parse a videos subcomponent
-
+
Args:
sub (bs4 object): A video subcomponent
-
+
Returns:
dict : parsed subresult
"""
parsed = {
- 'type': 'videos',
- 'sub_type': sub_type,
- 'sub_rank': sub_rank,
- 'url': get_url(sub),
- 'title': webutils.get_text(sub, 'div', {'role':'heading'}),
- 'text': webutils.get_text(sub, 'div', {'class':'MjS0Lc'}),
+ "type": "videos",
+ "sub_type": sub_type,
+ "sub_rank": sub_rank,
+ "url": get_url(sub),
+ "title": utils.get_text(sub, "div", {"role": "heading"}),
+ "text": utils.get_text(sub, "div", {"class": "MjS0Lc"}),
}
- details = sub.find_all('div', {'class':'MjS0Lc'})
+ details = sub.find_all("div", {"class": "MjS0Lc"})
if details:
text_div, citetime_div = details
- parsed['text'] = text_div.text if text_div else None
+ parsed["text"] = text_div.text if text_div else None
if citetime_div:
# Sometimes there is only a cite
- citetime = citetime_div.find('div',{'class':'zECGdd'})
+ citetime = citetime_div.find("div", {"class": "zECGdd"})
citetime = list(citetime.children)
if len(citetime) == 2:
- cite, timestamp = citetime
- parsed['cite'] = cite.text
+ cite, timestamp = citetime
+ parsed["cite"] = cite.text
# parsed.timestamp = timestamp.replace(' - ', '')
else:
- parsed['cite'] = citetime[0].text
- elif sub.find('span', {'class':'ocUPSd'}):
- parsed['cite'] = sub.text
+ parsed["cite"] = citetime[0].text
+ elif sub.find("span", {"class": "ocUPSd"}):
+ parsed["cite"] = sub.text
# parsed.timestamp = get_div_text(sub, {'class':'rjmdhd'})
elif sub.find("cite"):
- parsed['cite'] = webutils.get_text(sub, "cite")
- # parsed.timestamp = webutils.get_text(sub, "div", {'class':'hMJ0yc'})
+ parsed["cite"] = utils.get_text(sub, "cite")
+ # parsed.timestamp = utils.get_text(sub, "div", {'class':'hMJ0yc'})
return parsed
+
def get_url(sub):
"""Get video URL by filtering for non-hash links"""
- all_urls = sub.find_all('a')
+ all_urls = sub.find_all("a")
for url in all_urls:
- if "href" in url.attrs and not url.attrs['href'].startswith('#'):
+ if "href" in url.attrs and not url.attrs["href"].startswith("#"):
return url.attrs["href"]
return None
def get_div_text(soup, details):
- div = soup.find('div', details)
+ div = soup.find("div", details)
return div.text if div else None
def get_img_url(soup):
- """Extract image source"""
- img = soup.find('img')
- if img and 'data-src' in img.attrs:
- return img.attrs['data-src']
+ """Extract image source"""
+ img = soup.find("img")
+ if img and "data-src" in img.attrs:
+ return img.attrs["data-src"]
# Deprecated: images now have the same link, key moments are rare or gone
# def get_video_details(sub):
-# parsed['details'] = {}
+# parsed['details'] = {}
# parsed['details']['img_url'] = get_img_url(sub)
# # Check for "key moments" in video
# key_moments_div = sub.find('div', {'class':'AvBz0e'})
# parsed['details']['key_moments'] = True if key_moments_div else False
-# return parsed
\ No newline at end of file
+# return parsed
diff --git a/WebSearcher/component_parsers/view_more_news.py b/WebSearcher/component_parsers/view_more_news.py
index 9ad1f4a..9be4748 100644
--- a/WebSearcher/component_parsers/view_more_news.py
+++ b/WebSearcher/component_parsers/view_more_news.py
@@ -1,50 +1,52 @@
def parse_view_more_news(cmpt) -> list:
"""Parse a "View more news" component
- These components are highly similar to the vertically stacked Top Stories
+ These components are highly similar to the vertically stacked Top Stories
and Latest from results, but include a news icon in the top left.
-
+
Args:
cmpt (bs4 object): A local results component
-
+
Returns:
list : list of parsed subcomponent dictionaries
"""
- if cmpt.find('div', {'class':'qmv19b'}):
- subs = cmpt.find('div', {'class':'qmv19b'}).children
- elif cmpt.find('g-scrolling-carousel'):
- subs = cmpt.find('g-scrolling-carousel').find_all('g-inner-card')
+ if cmpt.find("div", {"class": "qmv19b"}):
+ subs = cmpt.find("div", {"class": "qmv19b"}).children
+ elif cmpt.find("g-scrolling-carousel"):
+ subs = cmpt.find("g-scrolling-carousel").find_all("g-inner-card")
return [parse_sub(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
+
def parse_sub(sub, sub_rank=0) -> dict:
"""Parse a "View more news" subcomponent
-
+
Args:
sub (bs4 object): A view more news subcomponent
-
+
Returns:
dict : parsed subresult
"""
- parsed = {'type':'view_more_news', 'sub_rank':sub_rank}
- parsed['title'] = sub.find('div', {'class': 'jBgGLd'}).text
- parsed['url'] = sub.find('a').attrs['href']
+ parsed = {"type": "view_more_news", "sub_rank": sub_rank}
+ parsed["title"] = sub.find("div", {"class": "jBgGLd"}).text
+ parsed["url"] = sub.find("a").attrs["href"]
- if sub.find('span', {'class':'wqg8ad'}):
- parsed['cite'] = sub.find('span', {'class':'wqg8ad'}).text
- elif sub.find('cite'):
- parsed['cite'] = sub.find('cite').text
+ if sub.find("span", {"class": "wqg8ad"}):
+ parsed["cite"] = sub.find("span", {"class": "wqg8ad"}).text
+ elif sub.find("cite"):
+ parsed["cite"] = sub.find("cite").text
- if sub.find('span', {'class':'FGlSad'}):
- parsed['timestamp'] = sub.find('span', {'class':'FGlSad'}).text
- elif sub.find('span', {'class':'f'}):
- parsed['timestamp'] = sub.find('span', {'class':'f'}).text
+ if sub.find("span", {"class": "FGlSad"}):
+ parsed["timestamp"] = sub.find("span", {"class": "FGlSad"}).text
+ elif sub.find("span", {"class": "f"}):
+ parsed["timestamp"] = sub.find("span", {"class": "f"}).text
- parsed['img_url'] = get_img_url(sub)
+ parsed["img_url"] = get_img_url(sub)
return parsed
+
def get_img_url(soup):
- """Extract image source"""
- img = soup.find('img')
- if img and 'data-src' in img.attrs:
- return img.attrs['data-src']
\ No newline at end of file
+ """Extract image source"""
+ img = soup.find("img")
+ if img and "data-src" in img.attrs:
+ return img.attrs["data-src"]
diff --git a/WebSearcher/components.py b/WebSearcher/components.py
index 30334d1..c32bf03 100644
--- a/WebSearcher/components.py
+++ b/WebSearcher/components.py
@@ -1,24 +1,32 @@
-from .models.data import BaseResult
-from .classifiers import ClassifyMain, ClassifyFooter, ClassifyHeaderComponent
-from .component_parsers import main_parser_dict, footer_parser_dict, header_parser_dict
-from .component_parsers import parse_unknown, parse_not_implemented
+import traceback
+from collections.abc import Callable
+
+import bs4
+
+from .classifiers import ClassifyFooter, ClassifyHeaderComponent, ClassifyMain
+from .component_parsers import (
+ footer_parser_dict,
+ header_parser_dict,
+ main_parser_dict,
+ parse_not_implemented,
+ parse_unknown,
+)
from .logger import Logger
+from .models.data import BaseResult
+
log = Logger().start(__name__)
-import bs4
-import traceback
-from collections.abc import Callable
class Component:
"""A SERP component extracted from HTML"""
def __init__(
- self,
- elem: bs4.element.Tag,
- section: str = "unknown",
- type: str = "unknown",
- cmpt_rank: int | None = None
- ) -> None:
+ self,
+ elem: bs4.element.Tag,
+ section: str = "unknown",
+ type: str = "unknown",
+ cmpt_rank: int | None = None,
+ ) -> None:
"""Initialize a Component
Args:
@@ -41,8 +49,8 @@ def to_dict(self) -> dict:
return self.__dict__
def get_metadata(self, key_filter=["section", "cmpt_rank"]) -> dict:
- return {k:v for k,v in self.to_dict().items() if k in key_filter}
-
+ return {k: v for k, v in self.to_dict().items() if k in key_filter}
+
def classify_component(self, classify_type_func: Callable | None = None):
"""Classify the component type"""
if classify_type_func:
@@ -85,14 +93,14 @@ def run_parser(self, parser_func: Callable) -> list:
return parsed_list
def parse_component(self, parser_type_func: Callable | None = None):
-
+
if not self.type:
parsed_list = self.create_parsed_list_error("null component type")
else:
# Select and run parser
parser_func = self.select_parser(parser_type_func)
parsed_list = self.run_parser(parser_func)
-
+
# Check parsed_list
if not isinstance(parsed_list, (list, dict)):
parsed_list = self.create_parsed_list_error("parser output not list or dict")
@@ -108,12 +116,14 @@ def create_parsed_list_error(self, error_msg: str, is_exception: bool = False) -
error_traceback = traceback.format_exc()
else:
log.debug(f"{error_msg}: {self.cmpt_rank} | {self.section} | {self.type}")
- return [{
- "type": self.type,
- "cmpt_rank": self.cmpt_rank,
- "text": self.elem.get_text("<|>", strip=True),
- "error": error_msg if not is_exception else f"{error_msg}: {error_traceback}"
- }]
+ return [
+ {
+ "type": self.type,
+ "cmpt_rank": self.cmpt_rank,
+ "text": self.elem.get_text("<|>", strip=True),
+ "error": error_msg if not is_exception else f"{error_msg}: {error_traceback}",
+ }
+ ]
def add_parsed_result_list(self, parsed_result_list):
for parsed_result in parsed_result_list:
@@ -126,7 +136,7 @@ def add_parsed_result(self, parsed_result):
def export_results(self):
"""Export the list of results"""
- result_metadata = {"section":self.section, "cmpt_rank":self.cmpt_rank}
+ result_metadata = {"section": self.section, "cmpt_rank": self.cmpt_rank}
results_list = [{**result_metadata, **result} for result in self.result_list]
return results_list
@@ -138,10 +148,11 @@ def __init__(self):
self.serp_rank_counter = 0
def __iter__(self):
- for component in self.components:
- yield component
+ yield from self.components
- def add_component(self, elem:bs4.element.Tag, section="unknown", type="unknown", cmpt_rank=None):
+ def add_component(
+ self, elem: bs4.element.Tag, section="unknown", type="unknown", cmpt_rank=None
+ ):
"""Add a component to the list of components"""
cmpt_rank = self.cmpt_rank_counter if not cmpt_rank else cmpt_rank
component = Component(elem, section, type, cmpt_rank)
@@ -165,7 +176,7 @@ def reorder_by_dom_position(self, dom_positions):
def _effective_pos(cmpt):
rng = dom_positions.get(id(cmpt.elem))
if rng is None:
- return float('inf')
+ return float("inf")
start, end = rng
# Check if this element's range contains another component
@@ -179,14 +190,14 @@ def _effective_pos(cmpt):
if start <= o_start <= end:
# cmpt.elem is an ancestor of other.elem — find
# first direct child positioned after the nested subtree
- best = float('inf')
+ best = float("inf")
for ch in cmpt.elem.children:
- if not hasattr(ch, 'name') or not ch.name:
+ if not hasattr(ch, "name") or not ch.name:
continue
ch_rng = dom_positions.get(id(ch))
if ch_rng and ch_rng[0] > o_end and ch_rng[0] < best:
best = ch_rng[0]
- if best != float('inf'):
+ if best != float("inf"):
return best
return start
diff --git a/WebSearcher/extractors/__init__.py b/WebSearcher/extractors/__init__.py
index bbafe9b..2162a78 100644
--- a/WebSearcher/extractors/__init__.py
+++ b/WebSearcher/extractors/__init__.py
@@ -1,13 +1,15 @@
import bs4
+
+from .. import logger
from ..components import ComponentList
-from .extractor_rhs import ExtractorRightHandSide
-from .extractor_main import ExtractorMain
-from .extractor_header import ExtractorHeader
from .extractor_footer import ExtractorFooter
+from .extractor_header import ExtractorHeader
+from .extractor_main import ExtractorMain
+from .extractor_rhs import ExtractorRightHandSide
-from .. import logger
log = logger.Logger().start(__name__)
+
class Extractor:
def __init__(self, soup: bs4.BeautifulSoup):
self.soup = soup
@@ -18,7 +20,7 @@ def __init__(self, soup: bs4.BeautifulSoup):
self.footer_handler = ExtractorFooter(self.soup, self.components)
def extract_components(self):
- log.debug(f"Extracting Components {'-'*50}")
+ log.debug(f"Extracting Components {'-' * 50}")
dom_positions = self._get_dom_positions(self.soup)
self.rhs_handler.extract()
self.header_handler.extract()
diff --git a/WebSearcher/extractors/extractor_footer.py b/WebSearcher/extractors/extractor_footer.py
index abe2530..06a6c39 100644
--- a/WebSearcher/extractors/extractor_footer.py
+++ b/WebSearcher/extractors/extractor_footer.py
@@ -1,9 +1,10 @@
import bs4
-from .. import webutils
-from .. import logger
+
+from .. import logger, utils
log = logger.Logger().start(__name__)
+
class ExtractorFooter:
def __init__(self, soup: bs4.BeautifulSoup, components):
self.soup = soup
@@ -12,16 +13,14 @@ def __init__(self, soup: bs4.BeautifulSoup, components):
def extract(self):
"""Extract the footer section of the SERP"""
- footer_div = self.soup.find('div', {'id':'botstuff'})
+ footer_div = self.soup.find("div", {"id": "botstuff"})
footer_component_list = []
if footer_div:
- footer_component_divs = webutils.find_all_divs(
- self.soup, 'div', {'id': ['bres', 'brs']}
- )
+ footer_component_divs = utils.find_all_divs(self.soup, "div", {"id": ["bres", "brs"]})
if footer_component_divs:
for footer_component_div in footer_component_divs:
- expanded_divs = webutils.find_all_divs(
+ expanded_divs = utils.find_all_divs(
footer_component_div, "div", {"class": "MjjYud"}
)
if expanded_divs and len(expanded_divs) > 1:
@@ -29,25 +28,24 @@ def extract(self):
else:
footer_component_list.append(footer_component_div)
- omitted_notice = self.soup.find('div', {'class':'ClPXac'})
+ omitted_notice = self.soup.find("div", {"class": "ClPXac"})
if omitted_notice:
footer_component_list.append(omitted_notice)
footer_component_list = [
- e for e in footer_component_list
- if not ExtractorFooter.is_hidden_footer(e)
+ e for e in footer_component_list if not ExtractorFooter.is_hidden_footer(e)
]
- log.debug(f'footer_components: {len(footer_component_list)}')
+ log.debug(f"footer_components: {len(footer_component_list)}")
for footer_component in footer_component_list:
- self.components.add_component(footer_component, section='footer')
+ self.components.add_component(footer_component, section="footer")
@staticmethod
def is_hidden_footer(element):
"""Filter out hidden footer components (no visual presence)."""
conditions = [
- element.find("span", {"class":"oUAcPd"}),
+ element.find("span", {"class": "oUAcPd"}),
element.find("div", {"class": "RTaUke"}),
element.find("div", {"class": "KJ7Tg"}),
]
- return any(conditions)
\ No newline at end of file
+ return any(conditions)
diff --git a/WebSearcher/extractors/extractor_header.py b/WebSearcher/extractors/extractor_header.py
index 7955d04..95e6a0f 100644
--- a/WebSearcher/extractors/extractor_header.py
+++ b/WebSearcher/extractors/extractor_header.py
@@ -1,9 +1,10 @@
import bs4
-from .. import webutils
-from .. import logger
+
+from .. import logger, utils
log = logger.Logger().start(__name__)
+
class ExtractorHeader:
def __init__(self, soup: bs4.BeautifulSoup, components):
self.soup = soup
@@ -17,17 +18,17 @@ def extract(self):
def extract_appbar(self):
"""Extract the top bar section, often a carousel of images or other suggestions."""
- appbar = self.soup.find('div', {'id':'appbar'})
+ appbar = self.soup.find("div", {"id": "appbar"})
if appbar:
- has_img = appbar.find(lambda tag: tag.has_attr('src') and not tag.has_attr('data-src'))
- if appbar.find('g-scrolling-carousel') and has_img:
- self.components.add_component(appbar, section='header', type='top_image_carousel')
+ has_img = appbar.find(lambda tag: tag.has_attr("src") and not tag.has_attr("data-src"))
+ if appbar.find("g-scrolling-carousel") and has_img:
+ self.components.add_component(appbar, section="header", type="top_image_carousel")
self.exists = True
def extract_notices(self):
"""Append notices to the components list at the end."""
- notices = webutils.find_all_divs(self.soup, "div", {"id": "oFNiHe"}, filter_empty=True)
+ notices = utils.find_all_divs(self.soup, "div", {"id": "oFNiHe"}, filter_empty=True)
if notices:
self.exists = True
for notice in notices:
- self.components.add_component(notice, section="header", type="notice")
\ No newline at end of file
+ self.components.add_component(notice, section="header", type="notice")
diff --git a/WebSearcher/extractors/extractor_main.py b/WebSearcher/extractors/extractor_main.py
index 6496120..1ecd92d 100644
--- a/WebSearcher/extractors/extractor_main.py
+++ b/WebSearcher/extractors/extractor_main.py
@@ -1,9 +1,11 @@
import bs4
-from .. import webutils
+
+from .. import utils
from ..logger import Logger
log = Logger().start(__name__)
+
class ExtractorMain:
def __init__(self, soup: bs4.BeautifulSoup, components):
self.soup = soup
@@ -26,7 +28,7 @@ def __init__(self, soup: bs4.BeautifulSoup, components):
"standard": self.extract_from_standard,
"top-bars": self.extract_from_top_bar,
"left-bar": self.extract_from_left_bar,
- "no-rso": self.extract_from_no_rso
+ "no-rso": self.extract_from_no_rso,
}
def extract(self):
@@ -42,28 +44,26 @@ def get_layout(self):
# Layout soup subsets
layout_divs = {}
- layout_divs['rso'] = self.soup.find('div', {'id':'rso'})
- layout_divs['left-bar'] = self.soup.find('div', {'class': 'OeVqAd'})
-
- rcnt = self.soup.find('div', {'id':'rcnt'})
- layout_divs['top-bars'] = webutils.find_all_divs(rcnt, 'div', {'class': ['XqFnDf', 'M8OgIe']})
-
+ layout_divs["rso"] = self.soup.find("div", {"id": "rso"})
+ layout_divs["left-bar"] = self.soup.find("div", {"class": "OeVqAd"})
+
+ rcnt = self.soup.find("div", {"id": "rcnt"})
+ layout_divs["top-bars"] = utils.find_all_divs(rcnt, "div", {"class": ["XqFnDf", "M8OgIe"]})
+
# Layout classifications
layouts = {}
- layouts['top-bars'] = bool(layout_divs['top-bars'])
- layouts['left-bar'] = bool(layout_divs['left-bar'])
- layouts['standard'] = (
- bool(layout_divs['rso']) &
- (not layouts['top-bars']) &
- (not layouts['left-bar'])
+ layouts["top-bars"] = bool(layout_divs["top-bars"])
+ layouts["left-bar"] = bool(layout_divs["left-bar"])
+ layouts["standard"] = (
+ bool(layout_divs["rso"]) & (not layouts["top-bars"]) & (not layouts["left-bar"])
)
- layouts['no-rso'] = not bool(layout_divs['rso'])
+ layouts["no-rso"] = not bool(layout_divs["rso"])
- if layouts['top-bars'] and bool(layout_divs['rso']) and not layouts['left-bar']:
- layout_label = 'standard'
- else:
+ if layouts["top-bars"] and bool(layout_divs["rso"]) and not layouts["left-bar"]:
+ layout_label = "standard"
+ else:
# Get layout label
- label_matches = [k for k,v in layouts.items() if v]
+ label_matches = [k for k, v in layouts.items() if v]
layout_label = label_matches[0] if label_matches else None
# Set layout details
@@ -74,156 +74,190 @@ def get_layout(self):
def _ads_top_carousel(self):
"""Extract sponsored carousel ads (e.g. Sponsored hotels via atvcap)"""
- ads = self.soup.find('div', {'id':'atvcap'})
- if ads and webutils.get_text(ads):
+ ads = self.soup.find("div", {"id": "atvcap"})
+ if ads and utils.get_text(ads):
ads.extract()
- self.components.add_component(ads, section='main', type='shopping_ads')
+ self.components.add_component(ads, section="main", type="shopping_ads")
def _ads_top(self):
- ads = self.soup.find('div', {'id':'tads'})
- if ads and webutils.get_text(ads):
+ ads = self.soup.find("div", {"id": "tads"})
+ if ads and utils.get_text(ads):
ads.extract()
- self.components.add_component(ads, section='main', type='ad')
+ self.components.add_component(ads, section="main", type="ad")
- def _main_column(self, drop_tags: set = {'script', 'style', None}):
+ def _main_column(self, drop_tags: set = {"script", "style", None}):
try:
extractor = self.layout_extractors[self.layout_label]
except KeyError:
raise ValueError(f"no extractor for layout_label: {self.layout_label}")
column = extractor(drop_tags)
- column = webutils.filter_empty_divs(column)
+ column = utils.filter_empty_divs(column)
for c in column:
if ExtractorMain.is_valid(c):
- self.components.add_component(c, section='main')
+ self.components.add_component(c, section="main")
def _ads_bottom(self):
- ads = self.soup.find('div', {'id':'tadsb'})
- if ads and webutils.get_text(ads):
+ ads = self.soup.find("div", {"id": "tadsb"})
+ if ads and utils.get_text(ads):
ads.extract()
- self.components.add_component(ads, section='main', type='ad')
+ self.components.add_component(ads, section="main", type="ad")
- def extract_from_standard(self, drop_tags:set={}) -> list:
+ def extract_from_standard(self, drop_tags: set = {}) -> list:
- rso_div = self.layout_divs['rso']
+ rso_div = self.layout_divs["rso"]
standard_layouts = {
- "standard-0": (rso_div.find('div', {'id':'kp-wp-tab-overview'}), 'div', [{'class':'TzHB6b'}, {'class':'A6K0A'}]),
- "standard-1": (rso_div.find('div', {'id':'kp-wp-tab-cont-Songs', 'role':'tabpanel'}), None, None),
- "standard-2": (rso_div.find('div', {'id':'kp-wp-tab-SportsStandings'}), None, None),
- "standard-4": (rso_div.find('div', {'id':'kp-wp-tab-AIRFARES'}), 'div', [{'class':'A6K0A'}]),
+ "standard-0": (
+ rso_div.find("div", {"id": "kp-wp-tab-overview"}),
+ "div",
+ [{"class": "TzHB6b"}, {"class": "A6K0A"}],
+ ),
+ "standard-1": (
+ rso_div.find("div", {"id": "kp-wp-tab-cont-Songs", "role": "tabpanel"}),
+ None,
+ None,
+ ),
+ "standard-2": (
+ rso_div.find("div", {"id": "kp-wp-tab-SportsStandings"}),
+ None,
+ None,
+ ),
+ "standard-4": (
+ rso_div.find("div", {"id": "kp-wp-tab-AIRFARES"}),
+ "div",
+ [{"class": "A6K0A"}],
+ ),
}
- for layout_name, (layout_div, check_tag, check_attrs_list) in standard_layouts.items():
+ for layout_name, (
+ layout_div,
+ check_tag,
+ check_attrs_list,
+ ) in standard_layouts.items():
if layout_div:
if check_tag:
- for check_attrs in (check_attrs_list if isinstance(check_attrs_list, list) else [check_attrs_list]):
+ for check_attrs in (
+ check_attrs_list
+ if isinstance(check_attrs_list, list)
+ else [check_attrs_list]
+ ):
if layout_div.find_all(check_tag, check_attrs):
return self._extract_from_standard_sub_type(layout_name)
elif layout_div.find_all("div"):
return self._extract_from_standard_sub_type(layout_name)
- top_divs = ExtractorMain.extract_top_divs(self.layout_divs['top-bars']) or []
+ top_divs = ExtractorMain.extract_top_divs(self.layout_divs["top-bars"]) or []
col = ExtractorMain.extract_children(rso_div, drop_tags)
col = top_divs + col
col = [c for c in col if ExtractorMain.is_valid(c)]
if not col:
- self.layout_label = 'standard-3'
+ self.layout_label = "standard-3"
log.debug(f"main_layout: {self.layout_label} (update)")
- divs = rso_div.find_all('div', {'id':'kp-wp-tab-overview'})
- col = sum([d.find_all('div', {'class':'TzHB6b'}) for d in divs], [])
+ divs = rso_div.find_all("div", {"id": "kp-wp-tab-overview"})
+ col = sum([d.find_all("div", {"class": "TzHB6b"}) for d in divs], [])
if not col:
- col = sum([d.find_all('div', {'class':'A6K0A'}, recursive=False) for d in divs], [])
+ col = sum(
+ [d.find_all("div", {"class": "A6K0A"}, recursive=False) for d in divs],
+ [],
+ )
return col
- def _extract_from_standard_sub_type(self, sub_type:str = "") -> list:
-
+ def _extract_from_standard_sub_type(self, sub_type: str = "") -> list:
+
self.layout_label = sub_type
- rso_div = self.layout_divs['rso']
+ rso_div = self.layout_divs["rso"]
log.debug(f"main_layout: {self.layout_label} (update)")
-
+
if self.layout_label == "standard-0":
column = []
- top_divs = ExtractorMain.extract_top_divs(self.layout_divs['top-bars']) or []
- tab_overview = rso_div.find('div', {'id':'kp-wp-tab-overview'})
- main_divs = tab_overview.find_all('div', {'class':'TzHB6b'}, recursive=False) if tab_overview else []
+ top_divs = ExtractorMain.extract_top_divs(self.layout_divs["top-bars"]) or []
+ tab_overview = rso_div.find("div", {"id": "kp-wp-tab-overview"})
+ main_divs = (
+ tab_overview.find_all("div", {"class": "TzHB6b"}, recursive=False)
+ if tab_overview
+ else []
+ )
if not main_divs and tab_overview:
- main_divs = tab_overview.find_all('div', {'class':'A6K0A'}, recursive=False)
+ main_divs = tab_overview.find_all("div", {"class": "A6K0A"}, recursive=False)
column.extend(top_divs)
column.extend(main_divs)
log.debug(f"main_components: {len(column):,}")
return column
-
+
if self.layout_label == "standard-1":
column = []
- top_divs = ExtractorMain.extract_top_divs(self.layout_divs['top-bars']) or []
- main_divs = rso_div.find('div', {'id':'kp-wp-tab-Songs'}).children or []
+ top_divs = ExtractorMain.extract_top_divs(self.layout_divs["top-bars"]) or []
+ main_divs = rso_div.find("div", {"id": "kp-wp-tab-Songs"}).children or []
column.extend(top_divs)
column.extend(main_divs)
- column = [div for div in column if div.name not in {'script', 'style'}]
- column = webutils.filter_empty_divs(column)
+ column = [div for div in column if div.name not in {"script", "style"}]
+ column = utils.filter_empty_divs(column)
return column
-
+
if self.layout_label == "standard-2":
column = []
- top_divs = ExtractorMain.extract_top_divs(self.layout_divs['top-bars']) or []
- main_divs = rso_div.find('div', {'id':'kp-wp-tab-SportsStandings'}).children or []
+ top_divs = ExtractorMain.extract_top_divs(self.layout_divs["top-bars"]) or []
+ main_divs = rso_div.find("div", {"id": "kp-wp-tab-SportsStandings"}).children or []
column.extend(top_divs)
column.extend(main_divs)
- column = [div for div in column if div.name not in {'script', 'style'}]
- column = webutils.filter_empty_divs(column)
+ column = [div for div in column if div.name not in {"script", "style"}]
+ column = utils.filter_empty_divs(column)
return column
if self.layout_label == "standard-4":
column = []
- top_divs = ExtractorMain.extract_top_divs(self.layout_divs['top-bars']) or []
- tab_airfares = rso_div.find('div', {'id':'kp-wp-tab-AIRFARES'})
- main_divs = tab_airfares.find_all('div', {'class':'A6K0A'}, recursive=False) if tab_airfares else []
+ top_divs = ExtractorMain.extract_top_divs(self.layout_divs["top-bars"]) or []
+ tab_airfares = rso_div.find("div", {"id": "kp-wp-tab-AIRFARES"})
+ main_divs = (
+ tab_airfares.find_all("div", {"class": "A6K0A"}, recursive=False)
+ if tab_airfares
+ else []
+ )
column.extend(top_divs)
column.extend(main_divs)
return column
-
- def extract_from_top_bar(self, drop_tags:set={}) -> list:
+ def extract_from_top_bar(self, drop_tags: set = {}) -> list:
out = []
- tops = ExtractorMain.extract_top_divs(self.layout_divs['top-bars'])
+ tops = ExtractorMain.extract_top_divs(self.layout_divs["top-bars"])
out.extend(tops)
div_classes = [
- 'cUnQKe', # people also ask
- 'g', # general
- 'Lv2Cle', # images-medium
- 'oIk2Cb', # searches_related
- 'Ww4FFb', # discussions_and_forums
- 'vtSz8d', # videos
- 'uVMCKf', # videos
+ "cUnQKe", # people also ask
+ "g", # general
+ "Lv2Cle", # images-medium
+ "oIk2Cb", # searches_related
+ "Ww4FFb", # discussions_and_forums
+ "vtSz8d", # videos
+ "uVMCKf", # videos
]
- rso_divs = self.layout_divs['rso'].find_all('div', attrs={'class':div_classes})
+ rso_divs = self.layout_divs["rso"].find_all("div", attrs={"class": div_classes})
if rso_divs:
- self.layout_label = 'top-bars-divs'
+ self.layout_label = "top-bars-divs"
col = [div for div in rso_divs if div.name not in drop_tags]
else:
- self.layout_label = 'top-bars-children'
- col = ExtractorMain.extract_children(self.layout_divs['rso'], drop_tags)
+ self.layout_label = "top-bars-children"
+ col = ExtractorMain.extract_children(self.layout_divs["rso"], drop_tags)
log.debug(f"main_layout: {self.layout_label} (update)")
out.extend(col)
return out
@staticmethod
- def extract_top_divs(soup, drop_tags:set={}) -> list:
+ def extract_top_divs(soup, drop_tags: set = {}) -> list:
out = []
for tb in soup:
- if webutils.check_dict_value(tb.attrs, "class", ["M8OgIe"]):
- kd = webutils.find_all_divs(tb, "div", {"jscontroller":["qTdDb","OWrb3e"]})
+ if utils.check_dict_value(tb.attrs, "class", ["M8OgIe"]):
+ kd = utils.find_all_divs(tb, "div", {"jscontroller": ["qTdDb", "OWrb3e"]})
if kd:
out.extend(kd)
else:
# Extract non-ad children (tvcap/tads handled by _ads_top)
for ch in tb.children:
- if not hasattr(ch, 'name') or not ch.name:
+ if not hasattr(ch, "name") or not ch.name:
continue
- if ch.find('div', {'id': 'tvcap'}) or ch.find('div', {'id': 'tads'}):
+ if ch.find("div", {"id": "tvcap"}) or ch.find("div", {"id": "tads"}):
continue
- if ch.name == 'h1':
+ if ch.name == "h1":
continue
out.append(ch)
elif ExtractorMain.is_dictionary_header(tb):
@@ -235,52 +269,59 @@ def extract_top_divs(soup, drop_tags:set={}) -> list:
@staticmethod
def is_dictionary_header(elem) -> bool:
"""Check if element is a dictionary word header (redundant with definitions)"""
- return (
- bool(elem.find('div', {'class': 'kp-wholepage-osrp'})) and
- bool(elem.find('div', {'data-attrid': 'title'}))
+ return bool(elem.find("div", {"class": "kp-wholepage-osrp"})) and bool(
+ elem.find("div", {"data-attrid": "title"})
)
- def extract_from_left_bar(self, drop_tags:set={}) -> list:
- return self.soup.find_all('div', {'class':'TzHB6b'})
+ def extract_from_left_bar(self, drop_tags: set = {}) -> list:
+ return self.soup.find_all("div", {"class": "TzHB6b"})
- def extract_from_no_rso(self, drop_tags:set={}) -> list:
- out=[]; sec1=self.soup.find_all('div', {'class':'UDZeY OTFaAf'})
+ def extract_from_no_rso(self, drop_tags: set = {}) -> list:
+ out = []
+ sec1 = self.soup.find_all("div", {"class": "UDZeY OTFaAf"})
for div in sec1:
- if div.find('h2') and div.find('h2').text=="Twitter Results":
- out.append(div.find('div').parent)
- elif div.find('g-section-with-header'):
- out.append(div.find('g-section-with-header').parent)
- elif div.find('g-more-link'):
+ if div.find("h2") and div.find("h2").text == "Twitter Results":
+ out.append(div.find("div").parent)
+ elif div.find("g-section-with-header"):
+ out.append(div.find("g-section-with-header").parent)
+ elif div.find("g-more-link"):
out.append(div)
- elif div.find('div',{'class':'oIk2Cb'}):
+ elif div.find("div", {"class": "oIk2Cb"}):
out.append(div)
else:
- out.extend(div.find_all('div',{'class':'g'}))
- sec2=self.soup.find('div',{'class':'WvKfwe a3spGf'})
+ out.extend(div.find_all("div", {"class": "g"}))
+ sec2 = self.soup.find("div", {"class": "WvKfwe a3spGf"})
if sec2:
out.extend(sec2.children)
return [c for c in out if c.name not in drop_tags]
@staticmethod
- def extract_children(soup, drop_tags:set={}) -> list:
- cts=[]
+ def extract_children(soup, drop_tags: set = {}) -> list:
+ cts = []
for ch in soup.children:
- if ch.name in drop_tags: continue
- if not ch.attrs: cts.extend(ch.contents)
- else: cts.append(ch)
+ if ch.name in drop_tags:
+ continue
+ if not ch.attrs:
+ cts.extend(ch.contents)
+ else:
+ cts.append(ch)
return cts
@staticmethod
def is_valid(c) -> bool:
- if not c: return False
- bad = {"Main results","Twitter Results",""}
- if c.text in bad: return False
+ if not c:
+ return False
+ bad = {"Main results", "Twitter Results", ""}
+ if c.text in bad:
+ return False
# Skip bottom ads wrapper (extracted separately)
- if c.find('div', {'id': 'tadsb'}): return False
+ if c.find("div", {"id": "tadsb"}):
+ return False
# hidden survey
cond = [
- c.find('promo-throttler'),
- webutils.check_dict_value(c.attrs,"class",["ULSxyf"]) if 'attrs' in c else False,
+ c.find("promo-throttler"),
+ utils.check_dict_value(c.attrs, "class", ["ULSxyf"]) if "attrs" in c else False,
]
- if all(cond): return False
- return True
\ No newline at end of file
+ if all(cond):
+ return False
+ return True
diff --git a/WebSearcher/extractors/extractor_rhs.py b/WebSearcher/extractors/extractor_rhs.py
index 4fc013d..d8b3857 100644
--- a/WebSearcher/extractors/extractor_rhs.py
+++ b/WebSearcher/extractors/extractor_rhs.py
@@ -1,9 +1,10 @@
import bs4
-from .. import webutils
-from .. import logger
+
+from .. import logger, utils
log = logger.Logger().start(__name__)
+
class ExtractorRightHandSide:
def __init__(self, soup: bs4.BeautifulSoup, components):
self.soup = soup
@@ -12,18 +13,14 @@ def __init__(self, soup: bs4.BeautifulSoup, components):
def extract(self):
"""Extract the RHS Knowledge Panel, if present."""
- rhs_div = self.soup.find('div', {'id': 'rhs'})
+ rhs_div = self.soup.find("div", {"id": "rhs"})
if not rhs_div:
return
rhs_div.extract()
layout, div = self._get_layout(rhs_div)
if layout:
log.debug(f"rhs_layout: {layout}")
- self.rhs = {
- "elem": div,
- "section": "rhs",
- "type": "knowledge_rhs"
- }
+ self.rhs = {"elem": div, "section": "rhs", "type": "knowledge_rhs"}
else:
log.debug("no rhs_layout")
@@ -36,8 +33,12 @@ def append(self):
def _get_layout(self, rhs_div):
rhs_layouts = {
- 'rhs_complementary': rhs_div if webutils.check_dict_value(rhs_div.attrs, "role", "complementary") else None,
- 'rhs_knowledge': rhs_div.find('div', {'class': ['kp-wholepage', 'knowledge-panel', 'TzHB6b']})
+ "rhs_complementary": rhs_div
+ if utils.check_dict_value(rhs_div.attrs, "role", "complementary")
+ else None,
+ "rhs_knowledge": rhs_div.find(
+ "div", {"class": ["kp-wholepage", "knowledge-panel", "TzHB6b"]}
+ ),
}
found = next((name for name, node in rhs_layouts.items() if node), None)
- return (found, rhs_div) if found else (None, rhs_div)
\ No newline at end of file
+ return (found, rhs_div) if found else (None, rhs_div)
diff --git a/WebSearcher/feature_extractor.py b/WebSearcher/feature_extractor.py
index db3d985..47d2bd3 100644
--- a/WebSearcher/feature_extractor.py
+++ b/WebSearcher/feature_extractor.py
@@ -1,7 +1,8 @@
import re
+
from bs4 import BeautifulSoup
-from . import webutils
+from . import utils
from .models.features import SERPFeatures
@@ -22,7 +23,7 @@ def extract_features(html_or_soup: str | BeautifulSoup) -> SERPFeatures:
html = str(soup)
else:
html = html_or_soup
- soup = webutils.make_soup(html)
+ soup = utils.make_soup(html)
# Extract result estimate count and time
rx_estimate = re.compile(r'.*?
')
@@ -32,9 +33,11 @@ def extract_features(html_or_soup: str | BeautifulSoup) -> SERPFeatures:
result_estimate_count = None
result_estimate_time = None
else:
- count_match = re.search(r'([0-9,]+) results', result_estimate_div)
- time_match = re.search(r'\(([0-9.]+)s?\s*(?:seconds)?\)', result_estimate_div)
- result_estimate_count = float(count_match.group(1).replace(",","")) if count_match else None
+ count_match = re.search(r"([0-9,]+) results", result_estimate_div)
+ time_match = re.search(r"\(([0-9.]+)s?\s*(?:seconds)?\)", result_estimate_div)
+ result_estimate_count = (
+ float(count_match.group(1).replace(",", "")) if count_match else None
+ )
result_estimate_time = float(time_match.group(1)) if time_match else None
# Extract language
@@ -48,18 +51,18 @@ def extract_features(html_or_soup: str | BeautifulSoup) -> SERPFeatures:
notice_no_results = bool(match)
string_match_dict = {
- 'notice_shortened_query': "(and any subsequent words) was ignored because we limit queries to 32 words.",
- 'notice_server_error': "We're sorry but it appears that there has been an internal server error while processing your request.",
- 'infinity_scroll': 'More results '
+ "notice_shortened_query": "(and any subsequent words) was ignored because we limit queries to 32 words.",
+ "notice_server_error": "We're sorry but it appears that there has been an internal server error while processing your request.",
+ "infinity_scroll": 'More results ',
}
string_matches = {key: (pattern in html) for key, pattern in string_match_dict.items()}
# Location prompt overlay (id="lb" with "precise location" text)
- lb = soup.find('div', {'id': 'lb'})
- overlay_precise_location = bool(lb and 'precise location' in lb.get_text().lower())
+ lb = soup.find("div", {"id": "lb"})
+ overlay_precise_location = bool(lb and "precise location" in lb.get_text().lower())
# CAPTCHA detection
- captcha = webutils.has_captcha(soup)
+ captcha = utils.has_captcha(soup)
return SERPFeatures(
result_estimate_count=result_estimate_count,
diff --git a/WebSearcher/locations.py b/WebSearcher/locations.py
index 86c9091..c3f052e 100644
--- a/WebSearcher/locations.py
+++ b/WebSearcher/locations.py
@@ -1,14 +1,15 @@
-import os
-import io
-import csv
import base64
+import csv
+import io
import zipfile
+from pathlib import Path
+from typing import Any
+
import requests
from google.protobuf.internal import decoder, encoder # uv add protobuf
-from typing import Any
-from . import logger
-from . import webutils as wu
+from . import logger, utils
+
log = logger.Logger().start(__name__)
@@ -20,7 +21,7 @@ def convert_canonical_name_to_uule(canon_name: str) -> str:
"""
fields = {1: 2, 2: 32, 4: canon_name}
encoded_string = encode_protobuf_string(fields)
- return f'w+{encoded_string}'
+ return f"w+{encoded_string}"
def encode_protobuf_string(fields: dict[int, str | int]) -> str:
@@ -33,18 +34,20 @@ def encode_protobuf_string(fields: dict[int, str | int]) -> str:
for field_number, value in fields.items():
wire_type = 2 if isinstance(value, str) else 0 # Determine wire type based on value type
- tag = field_number << 3 | wire_type # Combine field number and wire type into tag
- encoded.extend(encoder._VarintBytes(tag)) # Encode the tag into bytes
-
+ tag = field_number << 3 | wire_type # Combine field number and wire type into tag
+ encoded.extend(encoder._VarintBytes(tag)) # Encode the tag into bytes
+
# Encode the value based on wire type
if wire_type == 0:
- encoded.extend(encoder._VarintBytes(value)) # Encode the integer as varint
+ encoded.extend(encoder._VarintBytes(value)) # Encode the integer as varint
if wire_type == 2:
- value = value.encode('utf-8') # Convert string to bytes
+ value = value.encode("utf-8") # Convert string to bytes
encoded.extend(encoder._VarintBytes(len(value))) # Add length prefix
- encoded.extend(value) # Add the actual bytes
-
- return base64.b64encode(bytes(encoded)).decode('utf-8') # Convert to base64 and decode to string
+ encoded.extend(value) # Add the actual bytes
+
+ return base64.b64encode(bytes(encoded)).decode(
+ "utf-8"
+ ) # Convert to base64 and decode to string
def decode_protobuf_string(encoded_string: str) -> dict[int, Any]:
@@ -54,34 +57,44 @@ def decode_protobuf_string(encoded_string: str) -> dict[int, Any]:
Returns: dictionary where keys are protobuf field numbers and values are the decoded values
"""
- pos = 0 # Position tracker for decoding
- fields = {} # Dictionary to store decoded field numbers and values
+ pos = 0 # Position tracker for decoding
+ fields = {} # Dictionary to store decoded field numbers and values
- protobuf_bytes = base64.b64decode(encoded_string) # Convert to protobuf bytes
+ protobuf_bytes = base64.b64decode(encoded_string) # Convert to protobuf bytes
while pos < len(protobuf_bytes):
-
# Get field number and wire type
- tag, pos_new = decoder._DecodeVarint(protobuf_bytes, pos) # Each protobuf field starts with a varint tag
- field_number, wire_type = tag >> 3, tag & 7 # Extract field number and wire type from tag
-
+ tag, pos_new = decoder._DecodeVarint(
+ protobuf_bytes, pos
+ ) # Each protobuf field starts with a varint tag
+ field_number, wire_type = (
+ tag >> 3,
+ tag & 7,
+ ) # Extract field number and wire type from tag
+
# Decode value based on wire type (0: varint, 2: length-delimited; others not supported)
if wire_type == 0:
- value, pos_new = decoder._DecodeVarint(protobuf_bytes, pos_new) # Get the varint value and new position
+ value, pos_new = decoder._DecodeVarint(
+ protobuf_bytes, pos_new
+ ) # Get the varint value and new position
elif wire_type == 2:
- length, pos_start = decoder._DecodeVarint(protobuf_bytes, pos_new) # Get length and starting position
- value = protobuf_bytes[pos_start:pos_start + length] # Extract data based on the length
- pos_new = pos_start + length # Update the new position
- value = value.decode('utf-8') # Assume UTF-8 encoding for strings
-
- fields[field_number] = value # Store the field number and value in the dictionary
- pos = pos_new # Move to the next field using the updated position
+ length, pos_start = decoder._DecodeVarint(
+ protobuf_bytes, pos_new
+ ) # Get length and starting position
+ value = protobuf_bytes[
+ pos_start : pos_start + length
+ ] # Extract data based on the length
+ pos_new = pos_start + length # Update the new position
+ value = value.decode("utf-8") # Assume UTF-8 encoding for strings
+
+ fields[field_number] = value # Store the field number and value in the dictionary
+ pos = pos_new # Move to the next field using the updated position
return fields
def download_locations(
- data_dir: str = "data/locations",
- url: str = "https://developers.google.com/adwords/api/docs/appendix/geotargeting"
- ) -> None:
+ data_dir: str | Path = "data/locations",
+ url: str = "https://developers.google.com/adwords/api/docs/appendix/geotargeting",
+) -> None:
"""Download the latest geolocations, check if already exists locally first.
Args:
@@ -92,44 +105,45 @@ def download_locations(
None: Saves to file in the default or selected data_dir
"""
- os.makedirs(data_dir, exist_ok=True)
+ data_dir = Path(data_dir)
+ data_dir.mkdir(parents=True, exist_ok=True)
url_latest = get_latest_url(url)
- fp = os.path.join(data_dir, url_latest.split('/')[-1])
- fp_unzip = fp.replace('.zip', '')
+ fp = data_dir / url_latest.split("/")[-1]
+ fp_unzip = fp.with_suffix("")
# Check if the current version already exists
- if os.path.exists(fp):
+ if fp.exists():
print(f"Version up to date: {fp}")
- elif os.path.exists(fp_unzip):
+ elif fp_unzip.exists():
print(f"Version up to date: {fp_unzip}")
else:
- print(f"Version out of date")
+ print("Version out of date")
# Download and save
try:
- print(f'getting: {url_latest}')
+ print(f"getting: {url_latest}")
response = requests.get(url_latest)
except Exception:
- log.exception('Failed to retrieve location data')
+ log.exception("Failed to retrieve location data")
- if fp.endswith('.zip'):
- save_zip_response(response, fp_unzip)
+ if fp.suffix == ".zip":
+ save_zip_response(response, str(fp_unzip))
else:
- lines = response.content.decode('utf-8').split('\n')
- locations = [l for l in csv.reader(lines, delimiter=',')]
- write_csv(fp_unzip, locations)
+ lines = response.content.decode("utf-8").split("\n")
+ locations = list(csv.reader(lines, delimiter=","))
+ write_csv(str(fp_unzip), locations)
-def get_latest_url(url:str):
+def get_latest_url(url: str):
try:
html = requests.get(url).content
- soup = wu.make_soup(html)
- url_list = [url for url in wu.get_link_list(soup) if url and url != '']
- geo_urls = [url for url in url_list if 'geotargets' in url]
+ soup = utils.make_soup(html)
+ url_list = [url for url in utils.get_link_list(soup) if url and url != ""]
+ geo_urls = [url for url in url_list if "geotargets" in url]
# Get current CSV url and use as filename
geo_url = sorted(geo_urls)[-1]
- url_latest = 'https://developers.google.com' + geo_url
+ url_latest = "https://developers.google.com" + geo_url
return url_latest
except Exception:
@@ -139,18 +153,17 @@ def get_latest_url(url:str):
def save_zip_response(response: requests.Response, fp: str) -> None:
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
for member in zip_ref.namelist():
- if member.endswith('.csv'):
+ if member.endswith(".csv"):
with zip_ref.open(member) as csv_file:
- reader = csv.reader(io.TextIOWrapper(csv_file, 'utf-8'))
+ reader = csv.reader(io.TextIOWrapper(csv_file, "utf-8"))
write_csv(fp, reader=reader)
def write_csv(fp: str, lines: list = None, reader: csv.reader = None) -> None:
- with open(fp, 'w', encoding="utf-8") as outfile:
+ with open(fp, "w", encoding="utf-8") as outfile:
writer = csv.writer(outfile)
if reader:
writer.writerows(reader)
elif lines:
writer.writerows(lines)
print(f"saved: {fp}")
-
diff --git a/WebSearcher/logger.py b/WebSearcher/logger.py
index 44fa771..9bd2069 100644
--- a/WebSearcher/logger.py
+++ b/WebSearcher/logger.py
@@ -1,21 +1,21 @@
-""" Configure a logger using a dictionary
-"""
+"""Configure a logger using a dictionary"""
import logging.config
# Setting
-LOG_LEVEL_DEFAULT = 'INFO'
+LOG_LEVEL_DEFAULT = "INFO"
# Formatters: change what gets logged
-minimal = '%(message)s'
-medium = '%(asctime)s.%(msecs)01d | %(levelname)s | %(name)s | %(message)s'
-detailed = '%(asctime)s.%(msecs)01d | %(process)d | %(levelname)s | %(name)s | %(message)s'
+minimal = "%(message)s"
+medium = "%(asctime)s.%(msecs)01d | %(levelname)s | %(name)s | %(message)s"
+detailed = "%(asctime)s.%(msecs)01d | %(process)d | %(levelname)s | %(name)s | %(message)s"
formatters = {
- 'minimal': {'format': minimal},
- 'medium': {'format': medium, 'datefmt': '%Y-%m-%d %H:%M:%S'},
- 'detailed': {'format': detailed, 'datefmt': '%Y-%m-%d %H:%M:%S'}
+ "minimal": {"format": minimal},
+ "medium": {"format": medium, "datefmt": "%Y-%m-%d %H:%M:%S"},
+ "detailed": {"format": detailed, "datefmt": "%Y-%m-%d %H:%M:%S"},
}
+
class Logger:
"""
A configurable logger for console and file outputs.
@@ -27,14 +27,16 @@ class Logger:
start(name: Optional[str]): Initializes and retrieves the logger instance.
"""
- def __init__(self,
- console: bool = True,
- console_format: str = 'medium',
- console_level: str = LOG_LEVEL_DEFAULT,
- file_name: str = '',
- file_mode: str = 'w',
- file_format: str = 'detailed',
- file_level: str = LOG_LEVEL_DEFAULT) -> None:
+ def __init__(
+ self,
+ console: bool = True,
+ console_format: str = "medium",
+ console_level: str = LOG_LEVEL_DEFAULT,
+ file_name: str = "",
+ file_mode: str = "w",
+ file_format: str = "detailed",
+ file_level: str = LOG_LEVEL_DEFAULT,
+ ) -> None:
"""
Initializes the Logger configuration.
@@ -47,56 +49,58 @@ def __init__(self,
file_format (str): Format of the file logging. Should be either 'minimal' or 'detailed'.
file_level (str): Logging level for the file. Default is 'INFO'.
"""
-
+
# Handlers: change file and console logging details
handlers = {}
if console:
- assert console_format in formatters.keys(), \
- f'Console format must be one of {list(formatters.keys())}'
- handlers['console_handle'] = {
- 'class': 'logging.StreamHandler',
- 'level': console_level,
- 'formatter': console_format,
+ assert console_format in formatters.keys(), (
+ f"Console format must be one of {list(formatters.keys())}"
+ )
+ handlers["console_handle"] = {
+ "class": "logging.StreamHandler",
+ "level": console_level,
+ "formatter": console_format,
}
if file_name:
- assert type(file_name) is str, 'File name must be a string'
- assert file_format in formatters.keys(), \
- f'File format must be one of {list(formatters.keys())}'
- handlers['file_handle'] = {
- 'class': 'logging.FileHandler',
- 'level': file_level,
- 'formatter': file_format,
- 'filename': file_name,
- 'mode': file_mode
+ assert type(file_name) is str, "File name must be a string"
+ assert file_format in formatters.keys(), (
+ f"File format must be one of {list(formatters.keys())}"
+ )
+ handlers["file_handle"] = {
+ "class": "logging.FileHandler",
+ "level": file_level,
+ "formatter": file_format,
+ "filename": file_name,
+ "mode": file_mode,
}
-
+
# Loggers: change logging options for root and other packages
loggers = {
# Root logger
- '': {
- 'handlers': list(handlers.keys()),
- 'level': 'DEBUG',
- 'propagate': True
+ "": {
+ "handlers": list(handlers.keys()),
+ "level": "DEBUG",
+ "propagate": True,
},
# External loggers
- 'requests': {'level': 'WARNING'},
- 'urllib3': {'level': 'WARNING'},
- 'asyncio': {'level': 'INFO'},
- 'chardet.charsetprober': {'level': 'INFO'},
- 'parso': {'level': 'INFO'}, # Fix for ipython autocomplete bug
- 'undetected_chromedriver': {'level': 'WARNING'},
- 'uc': {'level': 'WARNING'},
+ "requests": {"level": "WARNING"},
+ "urllib3": {"level": "WARNING"},
+ "asyncio": {"level": "INFO"},
+ "chardet.charsetprober": {"level": "INFO"},
+ "parso": {"level": "INFO"}, # Fix for ipython autocomplete bug
+ "undetected_chromedriver": {"level": "WARNING"},
+ "uc": {"level": "WARNING"},
}
- self.log_config = {
- 'version': 1,
- 'disable_existing_loggers': False,
- 'formatters': formatters,
- 'handlers': handlers,
- 'loggers': loggers
+ self.log_config = {
+ "version": 1,
+ "disable_existing_loggers": False,
+ "formatters": formatters,
+ "handlers": handlers,
+ "loggers": loggers,
}
-
+
def start(self, name: str | None = __name__) -> logging.Logger:
logging.config.dictConfig(self.log_config)
- return logging.getLogger(name)
\ No newline at end of file
+ return logging.getLogger(name)
diff --git a/WebSearcher/models/cmpt_mappings.py b/WebSearcher/models/cmpt_mappings.py
index e255b5b..17cf704 100644
--- a/WebSearcher/models/cmpt_mappings.py
+++ b/WebSearcher/models/cmpt_mappings.py
@@ -110,9 +110,14 @@
"description": "Related questions that people search for",
"sub_types": [],
},
- "perspectives": {"description": "Opinion and perspective results", "sub_types": [
- "perspectives", "perspectives_&_opinions", "what_people_are_saying",
- ]},
+ "perspectives": {
+ "description": "Opinion and perspective results",
+ "sub_types": [
+ "perspectives",
+ "perspectives_&_opinions",
+ "what_people_are_saying",
+ ],
+ },
"scholarly_articles": {"description": "Google Scholar results", "sub_types": []},
"searches_related": {
"description": "Related search terms",
diff --git a/WebSearcher/models/configs.py b/WebSearcher/models/configs.py
index a408e56..69084e3 100644
--- a/WebSearcher/models/configs.py
+++ b/WebSearcher/models/configs.py
@@ -1,11 +1,13 @@
-import requests
import subprocess
from enum import Enum
+
+import requests
from pydantic import BaseModel, Field, computed_field
+
class BaseConfig(BaseModel):
"""Base class for all configuration classes"""
-
+
@classmethod
def create(cls, config=None):
"""Create a config instance from a dictionary or existing instance"""
@@ -13,31 +15,36 @@ def create(cls, config=None):
return cls(**config)
return config or cls()
+
class LogConfig(BaseConfig):
console: bool = True
- console_format: str = 'medium'
- console_level: str = 'INFO'
- file_name: str = ''
- file_mode: str = 'a'
- file_format: str = 'detailed'
- file_level: str = 'INFO'
+ console_format: str = "medium"
+ console_level: str = "INFO"
+ file_name: str = ""
+ file_mode: str = "a"
+ file_format: str = "detailed"
+ file_level: str = "INFO"
+
class SeleniumConfig(BaseConfig):
headless: bool = False
- version_main: int = 144
+ version_main: int | None = None
use_subprocess: bool = False
driver_executable_path: str = ""
+
class RequestsConfig(BaseConfig):
model_config = {"arbitrary_types_allowed": True}
- headers: dict[str, str] = Field(default_factory=lambda: {
- 'Host': 'www.google.com',
- 'Referer': 'https://www.google.com/',
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip,deflate,br',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0',
- })
+ headers: dict[str, str] = Field(
+ default_factory=lambda: {
+ "Host": "www.google.com",
+ "Referer": "https://www.google.com/",
+ "Accept": "*/*",
+ "Accept-Encoding": "gzip,deflate,br",
+ "Accept-Language": "en-US,en;q=0.5",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0",
+ }
+ )
ssh_tunnel: subprocess.Popen | None = None
unzip: bool = True
@@ -48,6 +55,7 @@ def sesh(self) -> requests.Session:
sesh.headers.update(self.headers)
return sesh
+
class SearchMethod(Enum):
REQUESTS = "requests"
SELENIUM = "selenium"
@@ -64,11 +72,14 @@ def create(cls, method=None):
return cls(method.lower())
except ValueError:
valid_values = [e.value for e in cls]
- raise ValueError(f"Invalid search method: {method}. Valid values are: {valid_values}")
+ raise ValueError(
+ f"Invalid search method: {method}. Valid values are: {valid_values}"
+ )
raise TypeError(f"Expected string or SearchMethod, got {type(method)}")
+
class SearchConfig(BaseConfig):
- method: str | SearchMethod = SearchMethod.SELENIUM
+ method: SearchMethod = SearchMethod.SELENIUM
log: LogConfig = Field(default_factory=LogConfig)
selenium: SeleniumConfig = Field(default_factory=SeleniumConfig)
requests: RequestsConfig = Field(default_factory=RequestsConfig)
diff --git a/WebSearcher/models/data.py b/WebSearcher/models/data.py
index a854228..0ebefab 100644
--- a/WebSearcher/models/data.py
+++ b/WebSearcher/models/data.py
@@ -1,56 +1,56 @@
from pydantic import BaseModel, Field
-from typing import Any
-from dataclasses import asdict, dataclass, field
-@dataclass
-class DetailsItem:
- """Represents a details item within a search result."""
- url: str = ''
- title: str = ''
- text: str = ''
- misc: dict = field(default_factory=dict)
- def to_dict(self) -> dict:
- return asdict(self)
+class ResponseOutput(BaseModel):
+ """Response data from a search request."""
+ html: str = ""
+ url: str = ""
+ user_agent: str = ""
+ response_code: int = 0
+ timestamp: str = ""
-class DetailsList(list):
- """A list of DetailsItem objects with conversion to dicts."""
- def append(self, item: DetailsItem):
- if not isinstance(item, DetailsItem):
- raise TypeError(f"Expected DetailsItem, got {type(item).__name__}")
- super().append(item)
+class ParsedSERP(BaseModel):
+ """Parsed output from a SERP."""
- def to_dicts(self) -> list[dict]:
- return [item.to_dict() for item in self]
+ crawl_id: str = ""
+ serp_id: str = ""
+ version: str = ""
+ method: str = ""
+ features: dict = Field(default_factory=dict)
+ results: list[dict] = Field(default_factory=list)
class BaseResult(BaseModel):
"""
Represents a single search result item extracted from a SERP.
-
+
Contains the structured data of one search result including its rank,
type, title, URL, and other metadata.
"""
+
sub_rank: int = Field(0, description="Position within a results component")
- type: str = Field('unclassified', description="Result type (general, ad, etc.)")
+ type: str = Field("unclassified", description="Result type (general, ad, etc.)")
sub_type: str | None = Field(None, description="Result sub-type (e.g., header, item)")
title: str | None = Field(None, description="Title of the search result")
url: str | None = Field(None, description="URL of the search result")
text: str | None = Field(None, description="Snippet text from the search result")
cite: str | None = Field(None, description="Citation or source information")
- details: Any | None = Field(None, description="Additional structured details specific to result type")
+ details: dict | None = Field(
+ None, description="Additional structured details specific to result type"
+ )
error: str | None = Field(None, description="Error message if result parsing failed")
class BaseSERP(BaseModel):
"""
Represents a complete Search Engine Results Page (SERP).
-
+
Contains all data related to a single search query including the query itself,
raw HTML response, metadata about the request, and identifiers for tracking.
"""
+
qry: str = Field(..., description="Search query")
loc: str | None = Field(None, description="Location if set, in Canonical Name format")
lang: str | None = Field(None, description="Language code if set")
diff --git a/WebSearcher/models/features.py b/WebSearcher/models/features.py
index bd4773d..b39b7fa 100644
--- a/WebSearcher/models/features.py
+++ b/WebSearcher/models/features.py
@@ -1,9 +1,9 @@
-from dataclasses import asdict, dataclass
+from pydantic import BaseModel
-@dataclass
-class SERPFeatures:
+class SERPFeatures(BaseModel):
"""Features extracted from a Search Engine Results Page (SERP)."""
+
result_estimate_count: float | None = None
result_estimate_time: float | None = None
language: str | None = None
@@ -13,6 +13,3 @@ class SERPFeatures:
infinity_scroll: bool = False
overlay_precise_location: bool = False
captcha: bool = False
-
- def to_dict(self) -> dict:
- return asdict(self)
diff --git a/WebSearcher/models/searches.py b/WebSearcher/models/searches.py
index 8335656..3dc19c2 100644
--- a/WebSearcher/models/searches.py
+++ b/WebSearcher/models/searches.py
@@ -1,16 +1,17 @@
-from pydantic import Field, computed_field
-from typing import Any
from datetime import datetime
+from typing import Any
+
+from pydantic import Field, computed_field
+from .. import locations, utils
from ..utils import hash_id
-from ..import webutils as wu
-from ..import locations
from .configs import BaseConfig
class SearchParams(BaseConfig):
"""Contains parameters for a search request and utility methods for URL generation"""
- qry: str = Field('', description="The search query text")
+
+ qry: str = Field("", description="The search query text")
num_results: int | None = Field(None, description="Number of results to return")
lang: str | None = Field(None, description="Language code (e.g., 'en')")
loc: str | None = Field(None, description="Location in Canonical Name format")
@@ -21,25 +22,25 @@ class SearchParams(BaseConfig):
@computed_field
def url_params(self) -> dict[str, Any]:
"""Generates a dictionary of URL parameters based on the search parameters"""
- params = {'q': wu.encode_param_value(self.qry)}
+ params = {"q": utils.encode_param_value(self.qry)}
opt_params = {
- 'num': self.num_results,
- 'hl': self.lang,
- 'uule': locations.convert_canonical_name_to_uule(self.loc) if self.loc else None,
+ "num": self.num_results,
+ "hl": self.lang,
+ "uule": locations.convert_canonical_name_to_uule(self.loc) if self.loc else None,
}
- opt_params = {k: v for k, v in opt_params.items() if v and v not in {'None', 'nan'}}
+ opt_params = {k: v for k, v in opt_params.items() if v and v not in {"None", "nan"}}
params.update(opt_params)
return params
-
+
@computed_field
def url(self) -> str:
"""Returns the fully formed search URL with all parameters"""
- return f"{self.base_url}?{wu.join_url_quote(self.url_params)}"
-
+ return f"{self.base_url}?{utils.join_url_quote(self.url_params)}"
+
@computed_field
def serp_id(self) -> str:
return hash_id(f"{self.qry}{self.loc}{datetime.now().isoformat()}")
-
+
def to_serp_output(self) -> dict[str, Any]:
return {
"qry": self.qry,
diff --git a/WebSearcher/parsers.py b/WebSearcher/parsers.py
index 513f2e0..4a70ac0 100644
--- a/WebSearcher/parsers.py
+++ b/WebSearcher/parsers.py
@@ -1,28 +1,24 @@
-from . import webutils
+from bs4 import BeautifulSoup
+
+from . import utils
from .extractors import Extractor
from .feature_extractor import FeatureExtractor
from .logger import Logger
-log = Logger().start(__name__)
-from bs4 import BeautifulSoup
+log = Logger().start(__name__)
-def parse_serp(
- serp: str | BeautifulSoup,
- extract_features: bool = False
- ) -> list[dict] | dict:
+def parse_serp(serp: str | BeautifulSoup) -> dict:
"""Parse a Search Engine Result Page (SERP)
Args:
serp: The HTML content of the SERP or a BeautifulSoup object
- extract_features: Whether to also extract SERP features. Defaults to False.
Returns:
- If extract_features is False, returns a list of result components.
- If extract_features is True, returns a dict with 'results' and 'features' keys.
+ A dict with 'results' and 'features' keys.
"""
# Extract components
- soup = webutils.make_soup(serp)
+ soup = utils.make_soup(serp)
extractor = Extractor(soup)
extractor.extract_components()
component_list = extractor.components
@@ -33,10 +29,7 @@ def parse_serp(
cmpt.parse_component()
results = component_list.export_component_results()
- if extract_features:
- return {
- "features": FeatureExtractor.extract_features(soup).to_dict(),
- "results": results
- }
-
- return results
+ return {
+ "features": FeatureExtractor.extract_features(soup).model_dump(),
+ "results": results,
+ }
diff --git a/WebSearcher/search_methods/requests_searcher.py b/WebSearcher/search_methods/requests_searcher.py
index 8541f10..f957270 100644
--- a/WebSearcher/search_methods/requests_searcher.py
+++ b/WebSearcher/search_methods/requests_searcher.py
@@ -1,18 +1,20 @@
import time
+from datetime import datetime, timezone
+
import brotli
import requests
-from datetime import datetime, timezone
-from typing import Any
from ..models.configs import RequestsConfig
+from ..models.data import ResponseOutput
from ..models.searches import SearchParams
+
class RequestsSearcher:
"""Handle Requests-based web interactions for search engines"""
-
+
def __init__(self, config: RequestsConfig, logger):
"""Initialize a Requests searcher with the given configuration
-
+
Args:
config: RequestsConfig instance
headers: Dictionary of HTTP headers
@@ -21,59 +23,55 @@ def __init__(self, config: RequestsConfig, logger):
self.config = config
self.log = logger
self.sesh = self.config.sesh or self._start_session()
-
+
def _start_session(self):
"""Start a new requests session with the configured headers"""
session = requests.Session()
session.headers.update(self.config.headers)
return session
-
- def send_request(self, search_params: SearchParams) -> dict[str, Any]:
+
+ def send_request(self, search_params: SearchParams) -> ResponseOutput:
"""Send a request and handle the response
-
+
Args:
search_params: SearchParams instance
- serp_id: Optional SERP ID
- crawl_id: Optional crawl ID
-
+
Returns:
- Dictionary with response data
+ ResponseOutput with response data
"""
if search_params.headers:
self.sesh.headers.update(search_params.headers)
-
- response_output = {
- 'html': '',
- 'url': search_params.url,
- 'user_agent': self.config.headers.get('User-Agent'),
- 'response_code': 0,
- 'timestamp': datetime.now(timezone.utc).replace(tzinfo=None).isoformat()
- }
-
+
+ response_output = ResponseOutput(
+ url=search_params.url,
+ user_agent=self.config.headers.get("User-Agent", ""),
+ timestamp=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
+ )
+
try:
response = self.sesh.get(search_params.url, timeout=10)
- response_output['html'] = self._handle_response_content(response)
- response_output['response_code'] = response.status_code
+ response_output.html = self._handle_response_content(response)
+ response_output.response_code = response.status_code
except requests.exceptions.ConnectionError:
- self.log.exception(f'Requests | Connection error')
+ self.log.exception("Requests | Connection error")
self._reset_ssh_tunnel()
except requests.exceptions.Timeout:
- self.log.exception(f'Requests | Timeout error')
+ self.log.exception("Requests | Timeout error")
except Exception:
- self.log.exception(f'Requests | Unknown error')
+ self.log.exception("Requests | Unknown error")
return response_output
def _handle_response_content(self, response):
try:
- if self.config.unzip:
+ if self.config.unzip:
html = self._unzip_html(response.content)
else:
html = response.content
- return html.decode('utf-8', 'ignore')
+ return html.decode("utf-8", "ignore")
except Exception:
- self.log.exception(f'Response handling error')
+ self.log.exception("Response handling error")
return response.content
def _unzip_html(self, content) -> bytes:
@@ -83,7 +81,7 @@ def _unzip_html(self, content) -> bytes:
except brotli.error:
return content
except Exception:
- self.log.exception(f'unzip error')
+ self.log.exception("unzip error")
return content
def _reset_ssh_tunnel(self):
@@ -91,5 +89,5 @@ def _reset_ssh_tunnel(self):
if self.config.ssh_tunnel:
self.config.ssh_tunnel.tunnel.kill()
self.config.ssh_tunnel.open_tunnel()
- self.log.info(f'SERP | Restarted SSH tunnel')
+ self.log.info("SERP | Restarted SSH tunnel")
time.sleep(10) # Allow time to establish connection
diff --git a/WebSearcher/search_methods/selenium_searcher.py b/WebSearcher/search_methods/selenium_searcher.py
index 92468d6..8fdd624 100644
--- a/WebSearcher/search_methods/selenium_searcher.py
+++ b/WebSearcher/search_methods/selenium_searcher.py
@@ -1,25 +1,26 @@
import time
-import orjson
from datetime import datetime, timezone
-from typing import Any
+import orjson
import undetected_chromedriver as uc
+from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
-from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
-from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.support.ui import WebDriverWait
from .. import utils
from ..models.configs import SeleniumConfig
+from ..models.data import ResponseOutput
from ..models.searches import SearchParams
+
class SeleniumDriver:
"""Handle Selenium-based web interactions for search engines"""
-
+
def __init__(self, config: SeleniumConfig, logger):
"""Initialize a Selenium driver with the given configuration
-
+
Args:
config (SeleniumConfig): Configuration for Selenium
logger: Logger instance
@@ -28,65 +29,65 @@ def __init__(self, config: SeleniumConfig, logger):
self.log = logger
self.driver = None
self.browser_info = {}
-
+
def init_driver(self) -> None:
"""Initialize Chrome driver with selenium-specific config"""
- self.log.debug(f'SERP | init uc chromedriver | kwargs: {self.config.__dict__}')
+ self.log.debug(f"SERP | init uc chromedriver | kwargs: {self.config.__dict__}")
self.driver = uc.Chrome(**self.config.__dict__)
-
+
# Log version information
self.browser_info = {
- 'browser_id': "",
- 'browser_name': self.driver.capabilities['browserName'],
- 'browser_version': self.driver.capabilities['browserVersion'],
- 'driver_version': self.driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0],
- 'user_agent': self.driver.execute_script('return navigator.userAgent'),
+ "browser_id": "",
+ "browser_name": self.driver.capabilities["browserName"],
+ "browser_version": self.driver.capabilities["browserVersion"],
+ "driver_version": self.driver.capabilities["chrome"]["chromedriverVersion"].split(" ")[
+ 0
+ ],
+ "user_agent": self.driver.execute_script("return navigator.userAgent"),
}
- self.browser_info['browser_id'] = utils.hash_id(orjson.dumps(self.browser_info).decode('utf-8'))
+ self.browser_info["browser_id"] = utils.hash_id(
+ orjson.dumps(self.browser_info).decode("utf-8")
+ )
self.log.debug(orjson.dumps(self.browser_info, option=orjson.OPT_INDENT_2))
-
+
def send_typed_query(self, query: str):
"""Send a typed query to the search box"""
time.sleep(2)
- self.driver.get('https://www.google.com')
+ self.driver.get("https://www.google.com")
time.sleep(2)
search_box = self.driver.find_element(By.ID, "APjFqb")
search_box.clear()
search_box.send_keys(query)
search_box.send_keys(Keys.RETURN)
-
- def send_request(self, search_params: SearchParams) -> dict[str, Any]:
+
+ def send_request(self, search_params: SearchParams) -> ResponseOutput:
"""Visit a URL with selenium and save HTML response"""
- response_output = {
- 'html': '',
- 'url': search_params.url,
- 'user_agent': self.browser_info['user_agent'],
- 'response_code': 0,
- 'timestamp': datetime.now(timezone.utc).replace(tzinfo=None).isoformat()
- }
+ response_output = ResponseOutput(
+ url=search_params.url,
+ user_agent=self.browser_info.get("user_agent", ""),
+ timestamp=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
+ )
try:
self.driver.get(search_params.url)
time.sleep(2)
- WebDriverWait(self.driver, 10).until(
- EC.presence_of_element_located((By.ID, "search"))
- )
+ WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.ID, "search")))
time.sleep(2)
- response_output['html'] = self.driver.page_source
- response_output['url'] = self.driver.current_url
- response_output['response_code'] = 200
+ response_output.html = self.driver.page_source
+ response_output.url = self.driver.current_url
+ response_output.response_code = 200
# Expand AI overview if requested
if search_params.ai_expand:
expanded_html = self.expand_ai_overview()
if expanded_html:
- len_diff = len(expanded_html) - len(response_output['html'])
+ len_diff = len(expanded_html) - len(response_output.html)
self.log.debug(f"SERP | expanded html | len diff: {len_diff}")
- response_output['html'] = expanded_html
+ response_output.html = expanded_html
except Exception as e:
- self.log.exception(f'SERP | Chromedriver error | {str(e)}')
+ self.log.exception(f"SERP | Chromedriver error | {str(e)}")
finally:
self.delete_cookies()
@@ -102,7 +103,7 @@ def expand_ai_overview(self):
show_more_button_exists = True
except NoSuchElementException:
show_more_button_exists = False
-
+
if show_more_button_exists:
try:
show_more_button = WebDriverWait(self.driver, 1).until(
@@ -111,42 +112,42 @@ def expand_ai_overview(self):
if show_more_button is not None:
show_more_button.click()
try:
- time.sleep(2) # Wait for additional content to load
+ time.sleep(2) # Wait for additional content to load
show_all_button = WebDriverWait(self.driver, 1).until(
EC.element_to_be_clickable((By.XPATH, show_all_button_xpath))
)
show_all_button.click()
except Exception:
pass
-
+
# Return expanded content
return self.driver.page_source
except Exception:
pass
-
+
return None
-
+
def cleanup(self) -> bool:
"""Clean up resources, particularly Selenium's browser instance
-
+
Returns:
bool: True if cleanup was successful or not needed, False if cleanup failed
"""
if self.driver:
try:
- self.delete_cookies()
- self.close_all_windows()
+ self.delete_cookies()
+ self.close_all_windows()
self.driver.quit()
self.driver = None
- self.log.debug(f'Browser successfully closed')
+ self.log.debug("Browser successfully closed")
return True
except Exception as e:
- self.log.warning(f'Failed to close browser: {e}')
+ self.log.warning(f"Failed to close browser: {e}")
self.driver = None
return False
return True
-
+
def close_all_windows(self):
try:
# Close all tabs/windows
@@ -158,7 +159,7 @@ def close_all_windows(self):
self.driver.close()
except Exception:
pass
-
+
def delete_cookies(self):
"""Delete all cookies from the browser"""
if self.driver:
@@ -166,7 +167,7 @@ def delete_cookies(self):
self.driver.delete_all_cookies()
except Exception as e:
self.log.warning(f"Failed to delete cookies: {str(e)}")
-
+
def __del__(self):
"""Destructor to ensure browser is closed when object is garbage collected"""
try:
diff --git a/WebSearcher/searchers.py b/WebSearcher/searchers.py
index bcf6e17..95799e5 100644
--- a/WebSearcher/searchers.py
+++ b/WebSearcher/searchers.py
@@ -1,28 +1,33 @@
-from . import parsers
-from . import utils
-from . import logger
-
-from .search_methods.selenium_searcher import SeleniumDriver
-from .search_methods.requests_searcher import RequestsSearcher
-
-from .models.configs import LogConfig, SeleniumConfig, RequestsConfig, SearchConfig, SearchMethod
+from importlib import metadata
+from pathlib import Path
+
+from . import logger, parsers, utils
+from .models.configs import (
+ LogConfig,
+ RequestsConfig,
+ SearchConfig,
+ SearchMethod,
+ SeleniumConfig,
+)
+from .models.data import BaseSERP, ParsedSERP
from .models.searches import SearchParams
-from .models.data import BaseSERP
+from .search_methods.requests_searcher import RequestsSearcher
+from .search_methods.selenium_searcher import SeleniumDriver
-import os
+WS_VERSION = metadata.version("WebSearcher")
-from importlib import metadata
-WS_VERSION = metadata.version('WebSearcher')
class SearchEngine:
"""Collect Search Engine Results Pages (SERPs)"""
- def __init__(self,
- method: str | SearchMethod = SearchMethod.SELENIUM,
- log_config: dict | LogConfig = {},
- selenium_config: dict | SeleniumConfig = {},
- requests_config: dict | RequestsConfig = {},
- crawl_id: str = '',
- ) -> None:
+
+ def __init__(
+ self,
+ method: str | SearchMethod = SearchMethod.SELENIUM,
+ log_config: dict | LogConfig = {},
+ selenium_config: dict | SeleniumConfig = {},
+ requests_config: dict | RequestsConfig = {},
+ crawl_id: str = "",
+ ) -> None:
"""Initialize the search engine
Args:
@@ -32,15 +37,17 @@ def __init__(self,
requests_config: Requests-specific configuration. Defaults to {}.
crawl_id: A unique identifier for the crawl. Defaults to ''.
"""
-
+
# Initialize config settings, log, and session data
self.method = method.value if isinstance(method, SearchMethod) else method
- self.config = SearchConfig.create({
- "method": SearchMethod.create(method),
- "log": LogConfig.create(log_config),
- "selenium": SeleniumConfig.create(selenium_config),
- "requests": RequestsConfig.create(requests_config),
- })
+ self.config = SearchConfig.create(
+ {
+ "method": SearchMethod.create(method),
+ "log": LogConfig.create(log_config),
+ "selenium": SeleniumConfig.create(selenium_config),
+ "requests": RequestsConfig.create(requests_config),
+ }
+ )
self.log = logger.Logger(**self.config.log.model_dump()).start(__name__)
self.session_data = {
"method": self.config.method.value,
@@ -57,18 +64,19 @@ def __init__(self,
# Initialize search params and output
self.search_params = SearchParams.create()
- self.parsed = {'results': [], 'features': {}}
-
- def search(self,
- qry: str,
- location: str | None = None,
- lang: str | None = None,
- num_results: int | None = None,
- ai_expand: bool = False,
- headers: dict[str, str] = {},
- ):
+ self.parsed = ParsedSERP()
+
+ def search(
+ self,
+ qry: str,
+ location: str | None = None,
+ lang: str | None = None,
+ num_results: int | None = None,
+ ai_expand: bool = False,
+ headers: dict[str, str] = {},
+ ):
"""Conduct a search and save HTML
-
+
Args:
qry: The search query
location: A location's Canonical Name
@@ -78,43 +86,53 @@ def search(self,
headers: Custom headers to include in the request
"""
- self.log.debug('starting search config')
- self.search_params = SearchParams.create({
- 'qry': str(qry),
- 'loc': str(location) if location is not None else '',
- 'lang': str(lang) if lang is not None else '',
- 'num_results': num_results,
- 'ai_expand': ai_expand,
- 'headers': headers,
- })
+ self.log.debug("starting search config")
+ self.search_params = SearchParams.create(
+ {
+ "qry": str(qry),
+ "loc": str(location) if location is not None else "",
+ "lang": str(lang) if lang is not None else "",
+ "num_results": num_results,
+ "ai_expand": ai_expand,
+ "headers": headers,
+ }
+ )
self.response_output = self.searcher.send_request(self.search_params)
serp_output = self.search_params.to_serp_output()
serp_output.update(self.session_data)
- serp_output.update(self.response_output)
+ serp_output.update(self.response_output.model_dump())
self.serp = BaseSERP(**serp_output).model_dump()
- self.log.info(" | ".join([f"{self.serp[k]}" for k in {'response_code','qry','loc'} if self.serp[k]]))
+ self.log.info(
+ " | ".join([f"{self.serp[k]}" for k in {"response_code", "qry", "loc"} if self.serp[k]])
+ )
# ==========================================================================
# Parsing
- def parse_serp(self, extract_features: bool = True):
+ def parse_serp(self):
try:
- parsed_metadata = {k:v for k,v in self.serp.items() if k in ['crawl_id', 'serp_id', 'version', 'method']}
- parsed = parsers.parse_serp(self.serp['html'], extract_features=extract_features)
- self.parsed = parsed_metadata | parsed
+ parsed = parsers.parse_serp(self.serp["html"])
+ self.parsed = ParsedSERP(
+ crawl_id=self.serp["crawl_id"],
+ serp_id=self.serp["serp_id"],
+ version=self.serp["version"],
+ method=self.serp["method"],
+ features=parsed["features"],
+ results=parsed["results"],
+ )
except Exception:
- self.log.exception(f'Parsing error | serp_id : {self.serp["serp_id"]}')
+ self.log.exception(f"Parsing error | serp_id : {self.serp['serp_id']}")
def parse_results(self):
"""Backwards compatibility for parsing results"""
self.parse_serp()
- self.results = self.parsed['results']
+ self.results = self.parsed.results
# ==========================================================================
# Saving
- def save_serp(self, save_dir: str = "", append_to: str = ""):
+ def save_serp(self, save_dir: str | Path = "", append_to: str | Path = ""):
"""Save SERP to file
Args:
@@ -127,34 +145,34 @@ def save_serp(self, save_dir: str = "", append_to: str = ""):
elif append_to:
utils.write_lines([self.serp], append_to)
elif save_dir:
- fp = os.path.join(save_dir, f'{self.serp["serp_id"]}.html')
- with open(fp, 'w') as outfile:
- outfile.write(self.serp['html'])
+ fp = Path(save_dir) / f"{self.serp['serp_id']}.html"
+ with open(fp, "w") as outfile:
+ outfile.write(self.serp["html"])
- def save_parsed(self, save_dir: str = "", append_to: str = ""):
+ def save_parsed(self, save_dir: str | Path = "", append_to: str | Path = ""):
"""Save parsed SERP to file"""
if not save_dir and not append_to:
self.log.warning("Must provide a save_dir or append_to file path to save parsed SERP")
return
- if not self.parsed:
+ if not self.parsed.results and not self.parsed.features:
self.log.warning("No parsed SERP available to save")
return
-
- fp = append_to if append_to else os.path.join(save_dir, 'parsed.json')
- utils.write_lines([self.parsed], fp)
- def save_search(self, append_to: str = ""):
+ fp = append_to if append_to else Path(save_dir) / "parsed.json"
+ utils.write_lines([self.parsed.model_dump()], fp)
+
+ def save_search(self, append_to: str | Path = ""):
"""Save SERP metadata (excludes HTML) to file"""
if not append_to:
self.log.warning("Must provide an append_to file path to save SERP metadata")
return
-
- self.serp_metadata = {k: v for k, v in self.serp.items() if k != 'html'}
+
+ self.serp_metadata = {k: v for k, v in self.serp.items() if k != "html"}
utils.write_lines([self.serp_metadata], append_to)
- def save_results(self, save_dir: str = "", append_to: str = ""):
+ def save_results(self, save_dir: str | Path = "", append_to: str | Path = ""):
"""Save parsed results
-
+
Args:
save_dir (str, optional): Save results as `save_dir/results/{serp_id}.json`
append_to (bool, optional): Append results to this file path
@@ -162,12 +180,12 @@ def save_results(self, save_dir: str = "", append_to: str = ""):
if not save_dir and not append_to:
self.log.warning("Must provide a save_dir or append_to file path to save results")
return
- if not self.parsed["results"]:
- self.log.warning(f'No parsed results to save')
+ if not self.parsed.results:
+ self.log.warning("No parsed results to save")
return
# Add metadata to results
- result_metadata = {k: self.serp[k] for k in ['crawl_id', 'serp_id', 'version']}
- results_output = [{**result, **result_metadata} for result in self.parsed["results"]]
- fp = append_to if append_to else os.path.join(save_dir, 'results.json')
+ result_metadata = {k: self.serp[k] for k in ["crawl_id", "serp_id", "version"]}
+ results_output = [{**result, **result_metadata} for result in self.parsed.results]
+ fp = append_to if append_to else Path(save_dir) / "results.json"
utils.write_lines(results_output, fp)
diff --git a/WebSearcher/utils.py b/WebSearcher/utils.py
index dec20e9..3c4b160 100644
--- a/WebSearcher/utils.py
+++ b/WebSearcher/utils.py
@@ -1,81 +1,306 @@
+import atexit
+import hashlib
import re
-import os
+import subprocess
+import urllib.parse as urlparse
+from collections.abc import Iterable, Mapping, Sequence
+from pathlib import Path
+from typing import Any
+
+import brotli
import orjson
-import random
-import hashlib
-import itertools
-from string import ascii_letters, digits
+import requests
+import tldextract
+from bs4 import BeautifulSoup
+from bs4.element import NavigableString, Tag
+
+from . import logger
+
+log = logger.Logger().start(__name__)
+
+SoupElement = BeautifulSoup | Tag | NavigableString
# Files ------------------------------------------------------------------------
-def all_abs_paths(dir):
- file_paths = []
- for folder, subs, files in os.walk(dir):
- for filename in files:
- file_paths.append(os.path.abspath(os.path.join(folder, filename)))
- return file_paths
-
-def read_lines(fp):
- try:
- is_json = '.json' in fp
- except TypeError:
- is_json = '.json' in fp.__fspath__()
-
- with open(fp, 'r') as infile:
- if is_json:
+
+def read_lines(fp: str | Path):
+ fp = Path(fp)
+ with open(fp) as infile:
+ if fp.suffix == ".json":
return [orjson.loads(line) for line in infile]
else:
return [line.strip() for line in infile]
-def write_lines(iter_data, fp, overwrite=False):
- mode = 'w' if overwrite else 'a+'
- try:
- is_json = '.json' in fp
- except TypeError:
- is_json = 'json' in fp.__fspath__()
+def write_lines(iter_data, fp: str | Path, overwrite=False):
+ fp = Path(fp)
+ mode = "w" if overwrite else "a+"
with open(fp, mode) as outfile:
for data in iter_data:
- if is_json:
- line_output = orjson.dumps(data).decode('utf-8')
+ if fp.suffix == ".json":
+ line_output = orjson.dumps(data).decode("utf-8")
else:
line_output = data
outfile.write(f"{line_output}\n")
-# Lists ------------------------------------------------------------------------
+def load_html(fp: str | Path, zipped: bool = False) -> str | bytes:
+ """Load html file, with option for brotli decompression"""
+ read_type = "rb" if zipped else "r"
+ with open(fp, read_type) as infile:
+ return brotli.decompress(infile.read()) if zipped else infile.read()
+
+
+def load_soup(fp: str | Path, zipped: bool = False) -> BeautifulSoup:
+ return make_soup(load_html(fp, zipped))
-def unlist(nested_list):
- return list(itertools.chain.from_iterable(nested_list))
# Strings ----------------------------------------------------------------------
-def split_by_spaces(s, n=2):
- # Split a string by n or more spaces
- return re.split(r'\s{%d,}' % n, s)
-def get_between_brackets(s, regex=r'\[(.*?)\]'):
+def get_between_parentheses(s, regex=r"\((.*?)\)"):
return re.search(regex, s).group(1)
-def get_between_parentheses(s, regex=r'\((.*?)\)'):
- return re.search(regex, s).group(1)
-
-def remove_digits(string):
- return "".join([x for x in string if not x.isdigit()]).strip()
-# Misc -------------------------------------------------------------------------
+# Hashing ----------------------------------------------------------------------
+
+
+def hash_id(s):
+ return hashlib.sha224(s.encode("utf-8")).hexdigest()
+
+
+# Parsing ----------------------------------------------------------------------
+
+
+def make_soup(html: str | bytes | BeautifulSoup, parser: str = "lxml") -> BeautifulSoup:
+ """Create soup object"""
+ if isinstance(html, BeautifulSoup):
+ return html
+ else:
+ return BeautifulSoup(html, parser)
+
+
+def has_captcha(soup: BeautifulSoup) -> bool:
+ """Boolean for 'CAPTCHA' appearance in soup"""
+ return True if soup.find(string=re.compile("CAPTCHA")) else False
+
+
+def check_dict_value(d: Mapping[str, Any], key: str, value: Any) -> bool:
+ """Check if a key exists in a dictionary and is equal to a input value"""
+ return (d[key] == value) if key in d else False
+
+
+# Get divs, links, and text ----------------------------------------------------
+
+
+def get_div(
+ soup: Tag | None,
+ name: str | None,
+ attrs: Mapping[str, Any] | None = None,
+) -> SoupElement | None:
+ """Utility for `soup.find(name)` with null attrs handling"""
+ if not soup:
+ return None
+ return soup.find(name, attrs) if attrs else soup.find(name)
+
+
+def get_text(
+ soup: Tag | None,
+ name: str | None = None,
+ attrs: Mapping[str, Any] | None = None,
+ separator: str = " ",
+ strip: bool = False,
+) -> str | None:
+ """Utility for `soup.find(name).text` with null name handling"""
+ if not soup:
+ return None
+ div = get_div(soup, name, attrs) if name else soup
+ if not div:
+ return None
+ text = div.get_text(separator=separator)
+ return text.strip() if strip else text
+
+
+def get_link(
+ soup: Tag | None, attrs: Mapping[str, Any] | None = None, key: str = "href"
+) -> str | None:
+ """Utility for `soup.find('a')['href']` with null key handling"""
+ link = get_div(soup, "a", attrs)
+ return link.attrs.get(key, None) if link else None
+
+
+def get_link_list(
+ soup: Tag | None,
+ attrs: Mapping[str, Any] | None = None,
+ key: str = "href",
+ filter_empty: bool = True,
+) -> list[str] | None:
+ """Utility for `soup.find_all('a')['href']` with null key handling"""
+ links = find_all_divs(soup, "a", attrs, filter_empty)
+ return [link.attrs.get(key, None) for link in links] if links else None
+
+
+def get_text_by_selectors(
+ soup: Tag | None,
+ selectors: Sequence[tuple[str, Mapping[str, Any]]] | None = None,
+ strip: bool = False,
+) -> str | None:
+ """Get text by trying multiple selectors, return first non-null"""
+ if not soup or not selectors:
+ return None
+ for name, attrs in selectors:
+ text = get_text(soup, name, attrs, strip=strip)
+ if text:
+ return text
+ return None
+
+
+def find_all_divs(
+ soup: Tag | None,
+ name: str,
+ attrs: Mapping[str, Any] | None = None,
+ filter_empty: bool = True,
+) -> list[SoupElement]:
+ if not soup:
+ return []
+ divs = soup.find_all(name, attrs) if attrs else soup.find_all(name)
+ divs = filter_empty_divs(divs) if filter_empty else divs
+ return list(divs)
+
+
+def filter_empty_divs(divs: Iterable[SoupElement]) -> list[SoupElement]:
+ filtered: list[SoupElement] = []
+ for candidate in divs:
+ if not candidate:
+ continue
+ text_content = candidate.text if hasattr(candidate, "text") else str(candidate)
+ if text_content.strip() != "":
+ filtered.append(candidate)
+ return filtered
+
+
+def find_children(
+ soup: BeautifulSoup | None,
+ name: str,
+ attrs: Mapping[str, Any] | None = None,
+ filter_empty: bool = False,
+) -> Iterable[SoupElement]:
+ """Find all children of a div with a given name and attribute"""
+ div = get_div(soup, name, attrs)
+ divs = div.children if div else []
+ divs = filter_empty_divs(divs) if filter_empty else divs
+ return divs
+
+
+# URLs -------------------------------------------------------------------------
+
+
+def join_url_quote(quote_dict: Mapping[str, str]) -> str:
+ return "&".join([f"{k}={v}" for k, v in quote_dict.items()])
+
+
+def encode_param_value(value: str) -> str:
+ return urlparse.quote_plus(value)
+
+
+def url_unquote(url: str) -> str:
+ return urlparse.unquote(url)
+
+
+def get_domain(url: str | None) -> str:
+ """Extract a full domain from a url, drop www"""
+ if not url:
+ return ""
+ domain = tldextract.extract(url)
+ without_subdomain = ".".join([domain.domain, domain.suffix])
+ with_subdomain = ".".join([domain.subdomain, domain.domain, domain.suffix])
+ if domain.subdomain:
+ domain_str = without_subdomain if domain.subdomain == "www" else with_subdomain
+ else:
+ domain_str = without_subdomain
+ return domain_str
+
+
+# Sessions ---------------------------------------------------------------------
+
+
+def start_sesh(
+ headers: Mapping[str, str] | None = None,
+ proxy_port: int | None = None,
+) -> requests.Session:
+ protocols = ["http", "https"]
+ proxy_base = "socks5://127.0.0.1:"
+
+ sesh = requests.Session()
+
+ if headers: # Add headers to all requests
+ sesh.headers.update(headers)
+
+ if proxy_port: # Send all requests through an ssh tunnel
+ proxies = {p: f"{proxy_base}{proxy_port}" for p in protocols}
+ sesh.proxies.update(proxies)
+
+ for protocol in protocols: # Auto retry if random connection error
+ sesh.mount(protocol, requests.adapters.HTTPAdapter(max_retries=3))
+
+ return sesh
+
+
+# SSH --------------------------------------------------------------------------
+
+
+class SSH:
+ """Create SSH cmd and tunnel objects"""
+
+ def __init__(
+ self,
+ user: str = "ubuntu",
+ port: int = 6000,
+ ip: str = "",
+ keyfile: str = "",
+ ) -> None:
+ self.user = user
+ self.keyfile = keyfile
+ self.port = port
+ self.ip = ip
+ self.machine = f"{self.user}@{self.ip}"
+ self.cmd = [
+ "ssh",
+ "-i",
+ self.keyfile,
+ "-ND",
+ f"127.0.0.1:{self.port}",
+ "-o",
+ "StrictHostKeyChecking=no",
+ self.machine,
+ ]
+ self.cmd_str = " ".join(self.cmd)
+ self.tunnel: subprocess.Popen[bytes] | None = None
+
+ def open_tunnel(self) -> None:
+ self.tunnel = subprocess.Popen(self.cmd, shell=False)
+
+
+def generate_ssh_tunnels(
+ ips: Sequence[str],
+ ports: Sequence[int],
+ keyfile: str,
+) -> list[SSH]:
+ """Generate SSH tunnels for each (IP, port) combination"""
-def hash_id(s):
- return hashlib.sha224(s.encode('utf-8')).hexdigest()
+ def generate_ssh_tunnel(ip: str, port: int, keyfile: str = keyfile) -> SSH:
+ ssh_tunnel = SSH(ip=ip, port=port, keyfile=keyfile)
+ subprocess.call(["chmod", "600", keyfile])
+ log.info(f"{ssh_tunnel.cmd_str}")
+ ssh_tunnel.open_tunnel()
+ atexit.register(exit_handler, ssh_tunnel) # Always kill tunnels on exit
+ return ssh_tunnel
-def make_id():
- return hashlib.sha224(random_string().encode('utf-8')).hexdigest()
+ return [generate_ssh_tunnel(ip, port) for ip, port in zip(ips, ports)]
-def alphanumerics():
- """Generate upper and lowercase letters and digits"""
- return ascii_letters + digits
-def random_string(length=12):
- """Generate a random string of alphanumerics"""
- return ''.join(random.choice(alphanumerics()) for i in range(length))
+def exit_handler(ssh: SSH) -> None:
+ log.info(f"Killing: {ssh.machine} on port: {ssh.port}")
+ if ssh.tunnel:
+ ssh.tunnel.kill()
diff --git a/WebSearcher/webutils.py b/WebSearcher/webutils.py
deleted file mode 100644
index 563b1c0..0000000
--- a/WebSearcher/webutils.py
+++ /dev/null
@@ -1,338 +0,0 @@
-"""webutils (wu): A useful collection of web utilities
-
-Note on using socks5h, hostname resolution
-https://stackoverflow.com/questions/12601316/how-to-make-python-requests-work-via-socks-proxy
-"""
-
-from . import utils
-from . import logger
-
-log = logger.Logger().start(__name__)
-
-import os
-import re
-import atexit
-import brotli
-import requests
-import subprocess
-import tldextract
-import urllib.parse as urlparse
-from collections.abc import Iterable, Mapping, Sequence
-from typing import Any
-from bs4 import BeautifulSoup
-from bs4.element import NavigableString, Tag
-
-SoupElement = BeautifulSoup | Tag | NavigableString
-
-
-def load_html(fp: str | os.PathLike[str], zipped: bool = False) -> str | bytes:
- """Load html file, with option for brotli decompression"""
- read_func = lambda i: brotli.decompress(i.read()) if zipped else i.read()
- read_type = "rb" if zipped else "r"
- with open(fp, read_type) as infile:
- return read_func(infile)
-
-
-def load_soup(fp: str | os.PathLike[str], zipped: bool = False) -> BeautifulSoup:
- return make_soup(load_html(fp, zipped))
-
-
-def start_sesh(
- headers: Mapping[str, str] | None = None,
- proxy_port: int | None = None,
-) -> requests.Session:
- protocols = ["http", "https"]
- proxy_base = "socks5://127.0.0.1:"
-
- sesh = requests.Session()
-
- if headers: # Add headers to all requests
- sesh.headers.update(headers)
-
- if proxy_port: # Send all requests through an ssh tunnel
- proxies = {p: f"{proxy_base}{proxy_port}" for p in protocols}
- sesh.proxies.update(proxies)
-
- for protocol in protocols: # Auto retry if random connection error
- sesh.mount(protocol, requests.adapters.HTTPAdapter(max_retries=3))
-
- return sesh
-
-
-# Misc -------------------------------------------------------------------------
-
-
-def check_dict_value(d: Mapping[str, Any], key: str, value: Any) -> bool:
- """Check if a key exists in a dictionary and is equal to a input value"""
- return (d[key] == value) if key in d else False
-
-
-# Parsing ----------------------------------------------------------------------
-
-
-def strip_html_tags(string: str) -> str:
- """Strips HTML """
- return re.sub("<[^<]+?>", "", string)
-
-
-def make_soup(html: str | bytes | BeautifulSoup, parser: str = "lxml") -> BeautifulSoup:
- """Create soup object"""
- if isinstance(html, BeautifulSoup):
- return html
- else:
- return BeautifulSoup(html, parser)
-
-
-def has_captcha(soup: BeautifulSoup) -> bool:
- """Boolean for 'CAPTCHA' appearance in soup"""
- return True if soup.find(string=re.compile("CAPTCHA")) else False
-
-
-def get_html_language(soup: BeautifulSoup) -> str:
- try:
- language = soup.html.attrs["lang"]
- except Exception:
- language = ""
- return language
-
-
-def parse_hashtags(text: str) -> list[str]:
- """Extract unique hashtags and strip surrounding punctuation"""
- hashtags = set([w for w in text.split() if w.startswith("#")])
- hashtags = [re.sub(r"(\W+)$", "", h, flags=re.UNICODE) for h in hashtags]
- return list(set(hashtags))
-
-
-def parse_lang(soup: BeautifulSoup) -> str | None:
- """Parse language from html tags"""
- try:
- return soup.find("html").attrs["lang"]
- except Exception as e:
- log.exception("Error while parsing language")
- return None
-
-
-# Get divs, links, and text ----------------------------------------------------
-
-
-def get_div(
- soup: Tag | None,
- name: str | None,
- attrs: Mapping[str, Any] | None = None,
-) -> SoupElement | None:
- """Utility for `soup.find(name)` with null attrs handling"""
- if not soup:
- return None
- return soup.find(name, attrs) if attrs else soup.find(name)
-
-
-def get_text(
- soup: Tag | None,
- name: str | None = None,
- attrs: Mapping[str, Any] | None = None,
- separator: str = " ",
- strip: bool = False,
-) -> str | None:
- """Utility for `soup.find(name).text` with null name handling"""
- if not soup:
- return None
- div = get_div(soup, name, attrs) if name else soup
- if not div:
- return None
- text = div.get_text(separator=separator)
- return text.strip() if strip else text
-
-
-def get_link(
- soup: Tag | None, attrs: Mapping[str, Any] | None = None, key: str = "href"
-) -> str | None:
- """Utility for `soup.find('a')['href']` with null key handling"""
- link = get_div(soup, "a", attrs)
- return link.attrs.get(key, None) if link else None
-
-
-def get_link_list(
- soup: Tag | None,
- attrs: Mapping[str, Any] | None = None,
- key: str = "href",
- filter_empty: bool = True,
-) -> list[str] | None:
- """Utility for `soup.find_all('a')['href']` with null key handling"""
- links = find_all_divs(soup, "a", attrs, filter_empty)
- return [link.attrs.get(key, None) for link in links] if links else None
-
-
-def get_text_by_selectors(
- soup: Tag | None,
- selectors: Sequence[tuple[str, Mapping[str, Any]]] | None = None,
- strip: bool = False,
-) -> str | None:
- """Get text by trying multiple selectors, return first non-null"""
- if not soup or not selectors:
- return None
- for name, attrs in selectors:
- text = get_text(soup, name, attrs, strip=strip)
- if text:
- return text
- return None
-
-
-def find_all_divs(
- soup: Tag | None,
- name: str,
- attrs: Mapping[str, Any] | None = None,
- filter_empty: bool = True,
-) -> list[SoupElement]:
- if not soup:
- return []
- divs = soup.find_all(name, attrs) if attrs else soup.find_all(name)
- divs = filter_empty_divs(divs) if filter_empty else divs
- return list(divs)
-
-
-def filter_empty_divs(divs: Iterable[SoupElement]) -> list[SoupElement]:
- filtered: list[SoupElement] = []
- for candidate in divs:
- if not candidate:
- continue
- text_content = candidate.text if hasattr(candidate, "text") else str(candidate)
- if text_content.strip() != "":
- filtered.append(candidate)
- return filtered
-
-
-def find_children(
- soup: BeautifulSoup | None,
- name: str,
- attrs: Mapping[str, Any] | None = None,
- filter_empty: bool = False,
-) -> Iterable[SoupElement]:
- """Find all children of a div with a given name and attribute"""
- div = get_div(soup, name, attrs)
- divs = div.children if div else []
- divs = filter_empty_divs(divs) if filter_empty else divs
- return divs
-
-
-# URLs -------------------------------------------------------------------------
-
-
-def join_url_quote(quote_dict: Mapping[str, str]) -> str:
- return "&".join([f"{k}={v}" for k, v in quote_dict.items()])
-
-
-def encode_param_value(value: str) -> str:
- return urlparse.quote_plus(value)
-
-
-def url_unquote(url: str) -> str:
- return urlparse.unquote(url)
-
-
-def get_domain(url: str | None) -> str:
- """Extract a full domain from a url, drop www"""
- if not url:
- return ""
- domain = tldextract.extract(url)
- without_subdomain = ".".join([domain.domain, domain.suffix])
- with_subdomain = ".".join([domain.subdomain, domain.domain, domain.suffix])
- if domain.subdomain:
- domain_str = without_subdomain if domain.subdomain == "www" else with_subdomain
- else:
- domain_str = without_subdomain
- return domain_str
-
-
-# Misc -------------------------------------------------------------------------
-
-
-def extract_html_json(
- data_fp: str | os.PathLike[str],
- extract_to: str | os.PathLike[str],
- id_col: str,
-) -> None:
- """Save HTML to directory for viewing"""
- os.makedirs(extract_to, exist_ok=True)
- data = utils.read_lines(data_fp)
- for row in data:
- fp = os.path.join(extract_to, row[id_col] + ".html")
- with open(fp, "wb") as outfile:
- outfile.write(row["html"])
-
-
-def split_styles(soup: BeautifulSoup) -> list[str] | None:
- """Extract embedded CSS"""
-
- def split_style(style):
- if style.string:
- return style.string.replace("}", "}\n").split("\n")
- else:
- return None
-
- styles = soup.find_all("style")
- if styles:
- style_chunks = [
- chunk for chunk in map(split_style, styles) if chunk is not None
- ]
- return sum(style_chunks, [])
- else:
- return None
-
-
-# SSH -------------------------------------------------------------------------
-
-
-class SSH:
- """Create SSH cmd and tunnel objects"""
-
- def __init__(
- self,
- user: str = "ubuntu",
- port: int = 6000,
- ip: str = "",
- keyfile: str = "",
- ) -> None:
- self.user = user
- self.keyfile = keyfile
- self.port = port
- self.ip = ip
- self.machine = f"{self.user}@{self.ip}"
- self.cmd = [
- "ssh",
- "-i",
- self.keyfile,
- "-ND",
- f"127.0.0.1:{self.port}",
- "-o",
- "StrictHostKeyChecking=no",
- self.machine,
- ]
- self.cmd_str = " ".join(self.cmd)
- self.tunnel: subprocess.Popen[bytes] | None = None
-
- def open_tunnel(self) -> None:
- self.tunnel = subprocess.Popen(self.cmd, shell=False)
-
-
-def generate_ssh_tunnels(
- ips: Sequence[str],
- ports: Sequence[int],
- keyfile: str,
-) -> list[SSH]:
- """Generate SSH tunnels for each (IP, port) combination"""
-
- def generate_ssh_tunnel(ip: str, port: int, keyfile: str = keyfile) -> SSH:
- ssh_tunnel = SSH(ip=ip, port=port, keyfile=keyfile)
- subprocess.call(["chmod", "600", keyfile])
- log.info(f"{ssh_tunnel.cmd_str}")
- ssh_tunnel.open_tunnel()
- atexit.register(exit_handler, ssh_tunnel) # Always kill tunnels on exit
- return ssh_tunnel
-
- return [generate_ssh_tunnel(ip, port) for ip, port in zip(ips, ports)]
-
-
-def exit_handler(ssh: SSH) -> None:
- log.info(f"Killing: {ssh.machine} on port: {ssh.port}")
- if ssh.tunnel:
- ssh.tunnel.kill()
diff --git a/pyproject.toml b/pyproject.toml
index d5dd735..edcb86b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "WebSearcher"
-version = "0.6.9"
+version = "0.7.0"
description = "Tools for conducting, collecting, and parsing web search"
authors = [{name = "Ronald E. Robertson", email = "rer@acm.org"}]
keywords = ["web", "search", "parser"]
@@ -26,6 +26,7 @@ repository = "http://github.com/gitronald/WebSearcher"
[project.scripts]
demo-search = 'scripts.demo_search:app'
+demo-searches = 'scripts.demo_searches:app'
[dependency-groups]
dev = [
@@ -37,9 +38,39 @@ dev = [
"polars>=1.37.1",
"setuptools>=80.9.0",
"tabulate>=0.9.0",
- "pandas>=2.2.3",
+ "pre-commit>=4.5.1",
+ "ruff>=0.15.6",
+ "pytest-cov>=7.0.0",
]
+[tool.pytest.ini_options]
+addopts = "--cov=WebSearcher --cov-report=term-missing"
+
+[tool.coverage.run]
+source = ["WebSearcher"]
+omit = ["WebSearcher/search_methods/*"]
+
+[tool.coverage.report]
+show_missing = true
+skip_empty = true
+
+[tool.ruff]
+target-version = "py310"
+line-length = 100
+
+[tool.ruff.lint]
+select = [
+ "F", # pyflakes
+ "E", # pycodestyle errors
+ "W", # pycodestyle warnings
+ "I", # isort (import sorting)
+ "UP", # pyupgrade (modern Python idioms)
+]
+ignore = ["E501"] # line length handled by ruff format
+
+[tool.ruff.lint.isort]
+known-first-party = ["WebSearcher"]
+
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
diff --git a/scripts/demo_locations.py b/scripts/demo_locations.py
index 655f142..97661a7 100644
--- a/scripts/demo_locations.py
+++ b/scripts/demo_locations.py
@@ -1,100 +1,84 @@
-""" Download and use locations
-"""
-import os
-import pandas as pd
+"""Download and use locations"""
+
+from pathlib import Path
+
+import polars as pl
+
import WebSearcher as ws
-# Retrieve and save latest location data
-data_dir = 'data/google_locations'
-os.makedirs(data_dir, exist_ok=True)
+# Retrieve and save latest location data
+data_dir = Path("data/google_locations")
+data_dir.mkdir(parents=True, exist_ok=True)
ws.download_locations(data_dir)
# Read it back in
-f = os.listdir(data_dir)[-1] # Last file
-fp = os.path.join(data_dir, f) # File path
-locs = pd.read_csv(fp) # Read
+f = sorted(data_dir.iterdir())[-1] # Last file
+locs = pl.read_csv(f)
-# locs.info()
+# locs.schema
#
-#
-# RangeIndex: 102029 entries, 0 to 102028
-# Data columns (total 7 columns):
-# Criteria ID 102029 non-null int64
-# Name 102029 non-null object
-# Canonical Name 102029 non-null object
-# Parent ID 101788 non-null float64
-# Country Code 102013 non-null object
-# Target Type 102029 non-null object
-# Status 102029 non-null object
-# dtypes: float64(1), int64(1), object(5)
-# memory usage: 5.4+ MB
-
-# locs.iloc[0]
+# Schema([('Criteria ID', Int64),
+# ('Name', String),
+# ('Canonical Name', String),
+# ('Parent ID', Int64),
+# ('Country Code', String),
+# ('Target Type', String),
+# ('Status', String)])
+
+# locs.row(0, named=True)
#
-# Criteria ID 1000002
-# Name Kabul
-# Canonical Name Kabul,Kabul,Afghanistan
-# Parent ID 9.07539e+06
-# Country Code AF
-# Target Type City
-# Status Active
-# Name: 0, dtype: object
+# {'Criteria ID': 1000002,
+# 'Name': 'Kabul',
+# 'Canonical Name': 'Kabul,Kabul,Afghanistan',
+# 'Parent ID': 9075394,
+# 'Country Code': 'AF',
+# 'Target Type': 'City',
+# 'Status': 'Active'}
# Looking for Canonical Names
-## Masks
-regex = r'(?=.*Boston)(?=.*Massachusetts)' # Has Boston and Massachusetts
-str_mask = locs['Canonical Name'].str.contains(regex)
-
-locs[str_mask]
-# 5368 Boston,England,United Kingdom
+## Filter
+regex = r"(?=.*Boston)(?=.*Massachusetts)" # Has Boston and Massachusetts
+matches = locs.filter(pl.col("Canonical Name").str.contains(regex))
+print(matches.select("Canonical Name"))
# 15849 Boston,Massachusetts,United States
# 15908 East Boston,Massachusetts,United States
-# 17201 New Boston,Michigan,United States
-# 19636 New Boston,New Hampshire,United States
-# 24368 New Boston,Texas,United States
-# 24763 Boston,Virginia,United States
-# 25003 South Boston,Virginia,United States
# 66033 Boston Logan International Airport,Massachusetts,United States
-# 66181 Manchester-Boston Regional Airport,New Hampshire,United States
# 84817 Boston College,Massachusetts,United States
-# 85140 Boston Ave - Mill Hill,Connecticut,United States
# 85985 South Boston,Massachusetts,United States
-# Name: Canonical Name, dtype: object
# Set Canonical Name
-canon_name = 'Boston,Massachusetts,United States'
+canon_name = "Boston,Massachusetts,United States"
# Get corresponding row
-name = locs[locs['Canonical Name'] == canon_name].iloc[0]
-name
+name = locs.filter(pl.col("Canonical Name") == canon_name).row(0, named=True)
+print(name)
-# Criteria ID 1018127
-# Name Boston
-# Canonical Name Boston,Massachusetts,United States
-# Parent ID 21152
-# Country Code US
-# Target Type City
-# Status Active
-# Name: 15849, dtype: object
+# {'Criteria ID': 1018127,
+# 'Name': 'Boston',
+# 'Canonical Name': 'Boston,Massachusetts,United States',
+# 'Parent ID': 21152,
+# 'Country Code': 'US',
+# 'Target Type': 'City',
+# 'Status': 'Active'}
# Initialize crawler
se = ws.SearchEngine()
# Conduct Search
-qry = 'pizza'
+qry = "pizza"
se.search(qry, location=canon_name)
# Parse Results
se.parse_results()
-# Shape as dataframe
+# Print results
if se.results:
- results = pd.DataFrame(se.results)
- with pd.option_context('display.max_colwidth', 80):
- print(results[['type', 'title']])
+ df = pl.DataFrame(se.results)
+ with pl.Config(fmt_str_lengths=80):
+ print(df.select("type", "title"))
# type title
# local_results FLORINA Pizzeria & Paninoteca
@@ -112,9 +96,9 @@
# general New Market Pizza - Boston, Boston, MA
# general Home | Regina Pizzeria, Boston's Brick Oven Pizza - Boston
# searches_related None
-# knowledge
+# knowledge
-dir_html = os.path.join("data", 'html')
-os.makedirs(dir_html, exist_ok=True)
-se.save_search(append_to=os.path.join(dir_html, "searches.json"))
+dir_html = Path("data/html")
+dir_html.mkdir(parents=True, exist_ok=True)
+se.save_search(append_to=dir_html / "searches.json")
se.save_serp(save_dir=dir_html)
diff --git a/scripts/demo_parse.py b/scripts/demo_parse.py
index 40bd485..07de18b 100644
--- a/scripts/demo_parse.py
+++ b/scripts/demo_parse.py
@@ -1,27 +1,23 @@
-""" Test parse SERP from .html file
-"""
+"""Test parse SERP from .html file"""
import argparse
-import pandas as pd
-import WebSearcher as ws
-pd.set_option('display.width', 120,
- 'display.max_colwidth', 40,
- 'display.max_rows', None,
- 'display.max_columns', None)
+import polars as pl
+
+import WebSearcher as ws
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--filepath", help="The SERP html file")
args = parser.parse_args()
if not args.filepath:
- print('Must include -f arg')
+ print("Must include -f arg")
else:
soup = ws.load_soup(args.filepath)
parsed = ws.parse_serp(soup)
- if parsed:
- results = pd.DataFrame(parsed)
- print(results[['type', 'title', 'url']])
+ if parsed["results"]:
+ df = pl.DataFrame(parsed["results"])
+ print(df.select("type", "title", "url"))
# Obtain HTML component list for examination
cmpts = ws.Extractor(soup).extract_components()
diff --git a/scripts/demo_screenshot.py b/scripts/demo_screenshot.py
index cd2fc30..3604631 100644
--- a/scripts/demo_screenshot.py
+++ b/scripts/demo_screenshot.py
@@ -11,6 +11,7 @@
import tempfile
import typer
+
import WebSearcher as ws
DEFAULT_DATA_DIR = os.path.join("data", f"demo-ws-v{ws.__version__}")
@@ -18,17 +19,17 @@
app = typer.Typer()
TYPE_COLORS = {
- "knowledge": "#4285f4",
- "general": "#34a853",
+ "knowledge": "#4285f4",
+ "general": "#34a853",
"discussions_and_forums": "#fbbc05",
- "perspectives": "#ea4335",
- "top_stories": "#ff6d01",
- "people_also_ask": "#46bdc6",
- "searches_related": "#7b1fa2",
- "locations": "#ff9800",
- "shopping_ads": "#e91e63",
- "unknown": "#d32f2f",
- "ad": "#f44336",
+ "perspectives": "#ea4335",
+ "top_stories": "#ff6d01",
+ "people_also_ask": "#46bdc6",
+ "searches_related": "#7b1fa2",
+ "locations": "#ff9800",
+ "shopping_ads": "#e91e63",
+ "unknown": "#d32f2f",
+ "ad": "#f44336",
}
DEFAULT_COLOR = "#9e9e9e"
diff --git a/scripts/demo_search.py b/scripts/demo_search.py
index 1004ac3..4f019fa 100644
--- a/scripts/demo_search.py
+++ b/scripts/demo_search.py
@@ -1,59 +1,60 @@
-""" Test search and parse a single query from command line
-"""
+"""Test search and parse a single query from command line"""
-import os
+from pathlib import Path
+
+import polars as pl
import typer
-import pandas as pd
-import WebSearcher as ws
-pd.set_option('display.width', 160,
- 'display.max_rows', None,
- 'display.max_columns', None,
- 'display.max_colwidth', 40)
+import WebSearcher as ws
-DEFAULT_DATA_DIR = os.path.join("data", f"demo-ws-v{ws.__version__}")
+DEFAULT_DATA_DIR = Path("data") / f"demo-ws-v{ws.__version__}"
app = typer.Typer()
+
@app.command()
def main(
query: str = typer.Argument("why is the sky blue?", help="Search query to use"),
method: str = typer.Argument("selenium", help="Search method to use: 'selenium' or 'requests'"),
- data_dir: str = typer.Option(DEFAULT_DATA_DIR, help="Prefix for output files"),
+ data_dir: str = typer.Option(str(DEFAULT_DATA_DIR), help="Prefix for output files"),
headless: bool = typer.Option(False, help="Run browser in headless mode"),
use_subprocess: bool = typer.Option(False, help="Run browser in a separate subprocess"),
- version_main: int = typer.Option(144, help="Main version of Chrome to use"),
+ version_main: int = typer.Option(
+ None, help="Main version of Chrome to use (auto-detects if not set)"
+ ),
ai_expand: bool = typer.Option(True, help="Expand AI overviews if present"),
driver_executable_path: str = typer.Option("", help="Path to ChromeDriver executable"),
) -> None:
-
+
# Filepaths
- fps = {k: os.path.join(data_dir, f"{k}.json") for k in ["serps", "parsed", "searches"]}
- os.makedirs(data_dir, exist_ok=True)
- print(f'WebSearcher v{ws.__version__}\nSearch Query: {query}\nOutput Dir: {data_dir}\n')
+ data_path = Path(data_dir)
+ fps = {k: data_path / f"{k}.json" for k in ["serps", "parsed", "searches"]}
+ data_path.mkdir(parents=True, exist_ok=True)
+ print(f"WebSearcher v{ws.__version__}\nSearch Query: {query}\nOutput Dir: {data_dir}\n")
# Setup search engine
se = ws.SearchEngine(
- method=method,
+ method=method,
selenium_config={
"headless": headless,
"use_subprocess": use_subprocess,
"driver_executable_path": driver_executable_path,
"version_main": version_main,
- }
+ },
)
# Search and parse
- se.search(query, ai_expand=ai_expand) # Conduct Search
- se.parse_results() # Parse Results
- se.save_serp(append_to=fps['serps']) # Save SERP to json (html + metadata)
- se.save_search(append_to=fps['searches']) # Save search metadata to json
- se.save_parsed(append_to=fps['parsed']) # Save results/features to json
+ se.search(query, ai_expand=ai_expand) # Conduct Search
+ se.parse_serp() # Parse Results
+ se.save_serp(append_to=fps["serps"]) # Save SERP to json (html + metadata)
+ se.save_search(append_to=fps["searches"]) # Save search metadata to json
+ se.save_parsed(append_to=fps["parsed"]) # Save results/features to json
+
+ # Print select columns
+ if se.parsed.results:
+ df = pl.DataFrame(se.parsed.results)
+ print(df.select("type", "sub_type", "title", "url"))
- # Convert results to dataframe and print select columns
- if se.parsed["results"]:
- results = pd.DataFrame(se.parsed["results"])
- print(results[['type', 'sub_type', 'title', 'url']])
if __name__ == "__main__":
- app()
\ No newline at end of file
+ app()
diff --git a/scripts/demo_search_headers.py b/scripts/demo_search_headers.py
index f3c909b..1f908ea 100644
--- a/scripts/demo_search_headers.py
+++ b/scripts/demo_search_headers.py
@@ -1,43 +1,43 @@
-""" Test search and parse a single query from command line
-"""
+"""Test search and parse a single query from command line"""
-import os
import argparse
-import pandas as pd
-import WebSearcher as ws
+from pathlib import Path
-pd.set_option('display.width', 120,
- 'display.max_colwidth', 40,
- 'display.max_rows', None,
- 'display.max_columns', None)
+import WebSearcher as ws
MODIFIED_HEADERS = {
- 'Host': 'www.google.com',
- 'Referer': 'https://www.google.com/',
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip,deflate,br',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0',
+ "Host": "www.google.com",
+ "Referer": "https://www.google.com/",
+ "Accept": "*/*",
+ "Accept-Encoding": "gzip,deflate,br",
+ "Accept-Language": "en-US,en;q=0.5",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0",
}
# Settings
parser = argparse.ArgumentParser()
parser.add_argument("-q", "--query", type=str, help="A search query", required=True)
-parser.add_argument("-d", "--data_dir", type=str, help="Directory to save data",
- default=os.path.join("data", f"demo-ws-v{ws.__version__}"))
+parser.add_argument(
+ "-d",
+ "--data_dir",
+ type=str,
+ help="Directory to save data",
+ default=str(Path("data") / f"demo-ws-v{ws.__version__}"),
+)
args = parser.parse_args()
-print(f'WebSearcher v{ws.__version__} | Search Query: {args.query} | Output: {args.data_dir}')
+print(f"WebSearcher v{ws.__version__} | Search Query: {args.query} | Output: {args.data_dir}")
# Filepaths
-fp_serps = os.path.join(args.data_dir, 'serps.json')
-fp_results = os.path.join(args.data_dir, 'results.json')
-dir_html = os.path.join(args.data_dir, 'html')
-os.makedirs(dir_html, exist_ok=True)
+data_path = Path(args.data_dir)
+fp_serps = data_path / "serps.json"
+fp_results = data_path / "results.json"
+dir_html = data_path / "html"
+dir_html.mkdir(parents=True, exist_ok=True)
# Search, parse, and save
se = ws.SearchEngine(headers=MODIFIED_HEADERS) # Initialize searcher
-se.search(args.query) # Conduct Search
-se.parse_results() # Parse Results
-se.save_serp(append_to=fp_serps) # Save SERP to json (html + metadata)
-se.save_results(append_to=fp_results) # Save results to json
-se.save_serp(save_dir=dir_html) # Save SERP html to dir (no metadata)
+se.search(args.query) # Conduct Search
+se.parse_results() # Parse Results
+se.save_serp(append_to=fp_serps) # Save SERP to json (html + metadata)
+se.save_results(append_to=fp_results) # Save results to json
+se.save_serp(save_dir=dir_html) # Save SERP html to dir (no metadata)
diff --git a/scripts/demo_searches.py b/scripts/demo_searches.py
index 17c190a..810e62e 100644
--- a/scripts/demo_searches.py
+++ b/scripts/demo_searches.py
@@ -1,18 +1,15 @@
"""Search and parse queries designed to trigger diverse SERP component types"""
-import os
-import time
import random
+import time
+from pathlib import Path
+
+import polars as pl
import typer
-import pandas as pd
-import WebSearcher as ws
-pd.set_option('display.width', 160)
-pd.set_option('display.max_rows', None)
-pd.set_option('display.max_columns', None)
-pd.set_option('display.max_colwidth', 40)
+import WebSearcher as ws
-DEFAULT_DATA_DIR = os.path.join("data", f"demo-ws-v{ws.__version__}")
+DEFAULT_DATA_DIR = str(Path("data") / f"demo-ws-v{ws.__version__}")
# Queries organized by target component type, 3 per type
# Some queries trigger multiple types (e.g. ads + shopping_ads)
@@ -117,21 +114,25 @@
app = typer.Typer()
+
@app.command()
def main(
method: str = typer.Argument("selenium", help="Search method to use: 'selenium' or 'requests'"),
data_dir: str = typer.Option(DEFAULT_DATA_DIR, help="Prefix for output files"),
headless: bool = typer.Option(False, help="Run browser in headless mode"),
use_subprocess: bool = typer.Option(False, help="Run browser in a separate subprocess"),
- version_main: int = typer.Option(144, help="Main version of Chrome to use"),
+ version_main: int = typer.Option(
+ None, help="Main version of Chrome to use (auto-detects if not set)"
+ ),
ai_expand: bool = typer.Option(True, help="Expand AI overviews if present"),
driver_executable_path: str = typer.Option("", help="Path to ChromeDriver executable"),
types: list[str] = typer.Option([], help="Only run queries for these target types"),
) -> None:
# Filepaths
- fps = {k: os.path.join(data_dir, f"{k}.json") for k in ["serps", "parsed", "searches"]}
- os.makedirs(data_dir, exist_ok=True)
+ data_path = Path(data_dir)
+ fps = {k: data_path / f"{k}.json" for k in ["serps", "parsed", "searches"]}
+ data_path.mkdir(parents=True, exist_ok=True)
# Filter queries by type if specified
if types:
@@ -149,33 +150,32 @@ def main(
"use_subprocess": use_subprocess,
"driver_executable_path": driver_executable_path,
"version_main": version_main,
- }
+ },
)
for i, qry in enumerate(queries):
-
# Search, parse, and save
- se.search(qry, ai_expand=ai_expand) # Conduct Search
- se.parse_serp() # Parse Results
- se.save_serp(append_to=fps['serps']) # Save SERP to json (html + metadata)
- se.save_search(append_to=fps['searches']) # Save search to json (metadata only)
- se.save_parsed(append_to=fps['parsed']) # Save parsed results and SERP features to json
+ se.search(qry, ai_expand=ai_expand) # Conduct Search
+ se.parse_serp() # Parse Results
+ se.save_serp(append_to=fps["serps"]) # Save SERP to json (html + metadata)
+ se.save_search(append_to=fps["searches"]) # Save search to json (metadata only)
+ se.save_parsed(append_to=fps["parsed"]) # Save parsed results and SERP features to json
# Check for CAPTCHA — retry once after waiting
- if se.parsed.get('features', {}).get('captcha'):
- print(f"\n[{i+1}/{len(queries)}] CAPTCHA detected for '{qry}', waiting 5 min...")
+ if se.parsed.features.get("captcha"):
+ print(f"\n[{i + 1}/{len(queries)}] CAPTCHA detected for '{qry}', waiting 5 min...")
time.sleep(300)
se.search(qry, ai_expand=ai_expand)
se.parse_serp()
- if se.parsed.get('features', {}).get('captcha'):
- print(f"CAPTCHA still present, stopping.")
+ if se.parsed.features.get("captcha"):
+ print("CAPTCHA still present, stopping.")
break
- # Convert results to dataframe and print select columns
- if se.parsed["results"]:
- results = pd.DataFrame(se.parsed["results"])
- print(f"\n[{i+1}/{len(queries)}] {qry}")
- print(results[['type', 'sub_type', 'title', 'url']])
+ # Print select columns
+ if se.parsed.results:
+ df = pl.DataFrame(se.parsed.results)
+ print(f"\n[{i + 1}/{len(queries)}] {qry}")
+ print(df.select("type", "sub_type", "title", "url"))
if i < len(queries) - 1:
time.sleep(30 + random.uniform(0, 5))
diff --git a/scripts/parsed_to_csv.py b/scripts/parsed_to_csv.py
index d6e5f74..1b52228 100644
--- a/scripts/parsed_to_csv.py
+++ b/scripts/parsed_to_csv.py
@@ -35,8 +35,9 @@ def read_parsed_jsonl(filepath: str) -> pl.DataFrame:
def main():
- parser = argparse.ArgumentParser(description=__doc__,
- formatter_class=argparse.RawDescriptionHelpFormatter)
+ parser = argparse.ArgumentParser(
+ description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+ )
parser.add_argument("filepath", help="Path to parsed.json file")
parser.add_argument("-o", "--output", help="Output CSV path (default: /results.csv)")
args = parser.parse_args()
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[01f85d1329ba].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[01f85d1329ba].json
index 18c3c26..06dd6ee 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[01f85d1329ba].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[01f85d1329ba].json
@@ -126,12 +126,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Northern Lights | When and where to see the Aurora Borealis Discover the World https://www.discover-the-world.com › northern-lights Discover the World https://www.discover-the-world.com › northern-lights",
- "What time are the northern lights best tonight?",
- "What causes the northern lights?",
- "The best place to see the Northern Lights | The Aurora Zone Aurora Zone https://theaurorazone.com › about-the-aurora › the-best-... Aurora Zone https://theaurorazone.com › about-the-aurora › the-best-..."
- ],
+ "details": {
+ "items": [
+ "Northern Lights | When and where to see the Aurora Borealis Discover the World https://www.discover-the-world.com › northern-lights Discover the World https://www.discover-the-world.com › northern-lights",
+ "What time are the northern lights best tonight?",
+ "What causes the northern lights?",
+ "The best place to see the Northern Lights | The Aurora Zone Aurora Zone https://theaurorazone.com › about-the-aurora › the-best-... Aurora Zone https://theaurorazone.com › about-the-aurora › the-best-..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 8,
@@ -791,6 +794,7 @@
"cmpt_rank": 12,
"details": {
"directions": "/maps/dir//Northern+Light+School,+3710+Dorisa+Ave,+Oakland,+CA+94605/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x56c896e8a09c2e03:0x214f4f09afa434a7?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://www.northernlightschool.com/"
},
"error": null,
@@ -808,6 +812,7 @@
"cmpt_rank": 12,
"details": {
"directions": "/maps/dir//North+Light,+4915+Telegraph+Ave,+Oakland,+CA+94609/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x80857d885c87df5b:0x23d84cf6bd07309?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://northlight.bar/"
},
"error": null,
@@ -825,6 +830,7 @@
"cmpt_rank": 12,
"details": {
"directions": "/maps/dir//Northern+Light+Venture+Capital,+2744+Sand+Hill+Rd+Suite+100,+Menlo+Park,+CA+94025/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fa4e3ed7ae611:0x61fcdeda34ed3060?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://www.nlvc.com/"
},
"error": null,
@@ -840,7 +846,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 58,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[032572e185d3].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[032572e185d3].json
index d0f71d7..2486efd 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[032572e185d3].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[032572e185d3].json
@@ -17,107 +17,74 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=4IWFaar5B6mWwbkPmI2oiQE&ved=2ahUKEwi1uKmwmsSSAxW2QjABHazhBQcQgK4QegYIAQgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue#:~:text=Gases%20and%20particles%20in%20Earth's%20atmosphere%20scatter,a%20blue%20sky%20most%20of%20the%20time."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://morgridge.org/blue-sky/why-is-the-sky-blue/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/askscience/comments/14566ig/why_is_the_sky_blue_do_i_understand_it_correctly/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html#:~:text=We%20have%20three%20types%20of%20colour%20receptors%2C,visual%20system%20constructs%20the%20colours%20we%20see."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://kids.nationalgeographic.com/books/article/sky"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.uu.edu/dept/physics/scienceguys/2000Oct.cfm"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/story/why-is-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.montrealsciencecentre.com/blog/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://plus.nasa.gov/video/space-place-in-a-snap-why-is-the-sky-blue-2/#:~:text=When%20sunlight%20enters%20Earth's%20atmosphere%2C%20it%20encounters,sky%20appears%20blue%20on%20a%20sunny%20day."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://plus.nasa.gov/video/space-place-in-a-snap-why-is-the-sky-blue-2/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -135,12 +102,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "What is the true color of our sky?",
- "How to explain to a kid why the sky is blue?",
- "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "What is the true color of our sky?",
+ "How to explain to a kid why the sky is blue?",
+ "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -394,7 +364,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -411,7 +382,8 @@
"cmpt_rank": 9,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -454,7 +426,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 23,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[0d3fc3b49b76].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[0d3fc3b49b76].json
index d564545..48e009a 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[0d3fc3b49b76].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[0d3fc3b49b76].json
@@ -182,7 +182,7 @@
{
"cite": null,
"cmpt_rank": 10,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 12,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[0ed311025efc].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[0ed311025efc].json
index 78a2009..3271f98 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[0ed311025efc].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[0ed311025efc].json
@@ -238,12 +238,15 @@
{
"cite": null,
"cmpt_rank": 4,
- "details": [
- "Is NY Times liberal or conservative?",
- "Why did Bari Weiss leave the NYT?",
- "What is Trump's favorite newspaper?",
- "Submit a Letter to The Editor - The New York Times Help Center NYT's Help Center - The New York Times https://help.nytimes.com › en-us › articles › 1150149252... NYT's Help Center - The New York Times https://help.nytimes.com › en-us › articles › 1150149252..."
- ],
+ "details": {
+ "items": [
+ "Is NY Times liberal or conservative?",
+ "Why did Bari Weiss leave the NYT?",
+ "What is Trump's favorite newspaper?",
+ "Submit a Letter to The Editor - The New York Times Help Center NYT's Help Center - The New York Times https://help.nytimes.com › en-us › articles › 1150149252... NYT's Help Center - The New York Times https://help.nytimes.com › en-us › articles › 1150149252..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 16,
@@ -411,7 +414,7 @@
{
"cite": null,
"cmpt_rank": 10,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 28,
@@ -426,7 +429,8 @@
"cite": null,
"cmpt_rank": 11,
"details": {
- "subtitle": "Newspaper"
+ "subtitle": "Newspaper",
+ "type": "panel"
},
"error": null,
"section": "rhs",
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[130eba186e94].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[130eba186e94].json
index 505255b..087c43b 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[130eba186e94].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[130eba186e94].json
@@ -31,95 +31,66 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "AlphaFold2",
- "title": "",
"url": "https://www.nature.com/articles/s41586-021-03819-2"
},
{
- "misc": {},
"text": "RoseTTAFold",
- "title": "",
"url": "https://www.science.org/doi/10.1126/science.abj8754"
},
{
- "misc": {},
"text": "Quanta Magazine|YouTube • Feb 7, 2025",
- "title": "",
"url": "https://www.youtube.com/shorts/6RSxZxiUix4"
},
{
- "misc": {},
"text": "MapDiff",
- "title": "",
"url": "https://phys.org/news/2025-06-machine-method-accuracy-inverse-protein.html"
},
{
- "misc": {},
"text": "hybrid quantum algorithms like QFold",
- "title": "",
"url": "https://ui.adsabs.harvard.edu/abs/2022APS..MARG38009C/abstract"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://pubmed.ncbi.nlm.nih.gov/40505455/#:~:text=Abstract,novel%20proteins%20for%20biotechnological%20applications."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://newsroom.uw.edu/news-releases/beyond-alphafold-ai-excels-creating-new-proteins#:~:text=Recently%2C%20powerful%20machine%20learning%20algorithms%20including%20AlphaFold,based%20solely%20on%20their%20amino%20acid%20sequences."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.quantamagazine.org/how-ai-revolutionized-protein-science-but-didnt-end-it-20240626/#:~:text=That%20week%2C%20a%20relative%20newcomer,of%20people%20were%20in%20denial.%E2%80%9D"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://theconversation.com/machine-learning-cracked-the-protein-folding-problem-and-won-the-2024-nobel-prize-in-chemistry-240937#:~:text=Proteins%20are%20the%20molecular%20machines%20of%20life.,*%20Alzheimer's%20*%20Cystic%20fibrosis%20*%20Diabetes"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.sciencedirect.com/science/article/pii/S0959440X19301447#:~:text=Many%20aspects%20of%20the%20study,become%20mainstream%20in%20protein%20simulation."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://pubmed.ncbi.nlm.nih.gov/31881449/#:~:text=Machine%20learning%20has%20had%20a%20significant%20impact,folding%20and%20dynamics%20in%20the%20near%20future."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.sciencedirect.com/science/article/abs/pii/S0959440X19301447"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://phys.org/news/2025-06-machine-method-accuracy-inverse-protein.html#:~:text=It%20also%20complements%20other%20recent,advancing%20next%2Dgeneration%20therapeutics.%22"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -389,7 +360,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 20,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[18eccfe8454e].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[18eccfe8454e].json
index 85532de..3ff6abd 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[18eccfe8454e].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[18eccfe8454e].json
@@ -28,12 +28,14 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": {},
+ "details": {
+ "type": "ratings"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
"sub_rank": 0,
- "sub_type": "results_for__austin,_tx",
+ "sub_type": "results_for",
"text": "Corner Restaurant<|>4.8<|>(17K)<|> · <|>$$<|> · Restaurant<|>110 E 2nd St<|> <|> <|> <|> <|> <|> <|>\"The wagyu burger was absolutely delicious as were the pollo asada tacos.\"",
"title": "Corner Restaurant",
"type": "local_results",
@@ -42,12 +44,14 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": {},
+ "details": {
+ "type": "ratings"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
"sub_rank": 1,
- "sub_type": "results_for__austin,_tx",
+ "sub_type": "results_for",
"text": "Caroline<|>4.8<|>(14K)<|> · <|>$20–30<|> · American<|>621 Congress Ave. Suite 101<|> <|> <|> <|> <|> <|> <|>\"The <|>food<|> was great, the drinks were great and our server, Aaron was AMAZING!\"",
"title": "Caroline",
"type": "local_results",
@@ -56,12 +60,14 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": {},
+ "details": {
+ "type": "ratings"
+ },
"error": null,
"section": "main",
"serp_rank": 3,
"sub_rank": 2,
- "sub_type": "results_for__austin,_tx",
+ "sub_type": "results_for",
"text": "Corinne Austin<|>4.8<|>(5.6K)<|> · <|>$$<|> · Restaurant<|>304 E Cesar Chavez St<|> <|> <|> <|> <|> <|> <|>\"The wagyu burger was cooked perfectly and the desserts were one of a kind!\"",
"title": "Corinne Austin",
"type": "local_results",
@@ -70,12 +76,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "What is the most \"Austin\" of Austin Restaurants? : r/austinfood - Reddit Reddit https://www.reddit.com › austinfood › comments › what... Reddit https://www.reddit.com › austinfood › comments › what...",
- "What food is Austin, Texas best known for?",
- "Is $100 a good salary in Austin, Texas?",
- "THE 10 BEST Dinner Restaurants in Austin (UPDATED 2026) - Tripadvisor Tripadvisor https://www.tripadvisor.com › Restaurants-g30196-zfp58... Tripadvisor https://www.tripadvisor.com › Restaurants-g30196-zfp58..."
- ],
+ "details": {
+ "items": [
+ "What is the most \"Austin\" of Austin Restaurants? : r/austinfood - Reddit Reddit https://www.reddit.com › austinfood › comments › what... Reddit https://www.reddit.com › austinfood › comments › what...",
+ "What food is Austin, Texas best known for?",
+ "Is $100 a good salary in Austin, Texas?",
+ "THE 10 BEST Dinner Restaurants in Austin (UPDATED 2026) - Tripadvisor Tripadvisor https://www.tripadvisor.com › Restaurants-g30196-zfp58... Tripadvisor https://www.tripadvisor.com › Restaurants-g30196-zfp58..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 4,
@@ -453,14 +462,17 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [
- "LENOIR",
- "Emmer & Rye",
- "Olamaie",
- "Odd Duck",
- "Lutie's",
- "Comedor"
- ],
+ "details": {
+ "items": [
+ "LENOIR",
+ "Emmer & Rye",
+ "Olamaie",
+ "Odd Duck",
+ "Lutie's",
+ "Comedor"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 31,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[2c0aa0bbcd0c].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[2c0aa0bbcd0c].json
index bede06e..d6697dc 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[2c0aa0bbcd0c].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[2c0aa0bbcd0c].json
@@ -44,7 +44,8 @@
"cmpt_rank": 2,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -59,12 +60,15 @@
{
"cite": null,
"cmpt_rank": 3,
- "details": [
- "How should a beginner start yoga?",
- "Can I lose belly fat by doing yoga?",
- "Which type of yoga is best for beginners?",
- "7 Essential Yoga Tips for Beginners and Common Mistakes to Avoid Body Flows Yoga Retreats https://bodyflows.com › articles › yoga-for-beginners-7-t... Body Flows Yoga Retreats https://bodyflows.com › articles › yoga-for-beginners-7-t..."
- ],
+ "details": {
+ "items": [
+ "How should a beginner start yoga?",
+ "Can I lose belly fat by doing yoga?",
+ "Which type of yoga is best for beginners?",
+ "7 Essential Yoga Tips for Beginners and Common Mistakes to Avoid Body Flows Yoga Retreats https://bodyflows.com › articles › yoga-for-beginners-7-t... Body Flows Yoga Retreats https://bodyflows.com › articles › yoga-for-beginners-7-t..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 3,
@@ -96,41 +100,30 @@
"heading": "Find related products & services",
"img_url": null,
"text": "Find related products & services|About the source|About the source|About the source|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These searches help you find relevant offers from advertisers",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Yoga for Beginners |with Music",
- "title": "",
"url": "http://www.google.com/search?q=Yoga+for+Beginners+with+Music&mrq=1&mrqri=0&mrqei=iIOGacWOObeFxc8PiJ-3kQw&sa=X&ved=2ahUKEwjF8sCkjMaSAxW3QvEDHYjPLcIQyO0OKAB6BAhxEAU"
},
{
- "misc": {},
"text": "Yoga for beginners |app free",
- "title": "",
"url": "http://www.google.com/search?q=Yoga+for+beginners+app+free&mrq=1&mrqri=1&mrqei=iIOGacWOObeFxc8PiJ-3kQw&sa=X&ved=2ahUKEwjF8sCkjMaSAxW3QvEDHYjPLcIQyO0OKAF6BAhxEAY"
},
{
- "misc": {},
"text": "Yoga for beginners |at home men",
- "title": "",
"url": "http://www.google.com/search?q=Yoga+for+beginners+at+home+men&mrq=1&mrqri=2&mrqei=iIOGacWOObeFxc8PiJ-3kQw&sa=X&ved=2ahUKEwjF8sCkjMaSAxW3QvEDHYjPLcIQyO0OKAJ6BAhxEAc"
},
{
- "misc": {},
"text": "Yoga for Beginners |At home free",
- "title": "",
"url": "http://www.google.com/search?q=Yoga+for+Beginners+At+home+free&mrq=1&mrqri=3&mrqei=iIOGacWOObeFxc8PiJ-3kQw&sa=X&ved=2ahUKEwjF8sCkjMaSAxW3QvEDHYjPLcIQyO0OKAN6BAhxEAg"
},
{
- "misc": {},
"text": "Yoga for beginners |near me| for |seniors",
- "title": "",
"url": "http://www.google.com/search?q=Yoga+for+beginners+near+me+for+seniors&mrq=1&mrqri=4&mrqei=iIOGacWOObeFxc8PiJ-3kQw&sa=X&ved=2ahUKEwjF8sCkjMaSAxW3QvEDHYjPLcIQyO0OKAR6BAhxEAk"
},
{
- "misc": {},
"text": "Yoga for beginners |near me",
- "title": "",
"url": "http://www.google.com/search?q=Yoga+for+beginners+near+me&mrq=1&mrqri=5&mrqei=iIOGacWOObeFxc8PiJ-3kQw&sa=X&ved=2ahUKEwjF8sCkjMaSAxW3QvEDHYjPLcIQyO0OKAV6BAhxEAo"
}
]
@@ -318,7 +311,8 @@
"cmpt_rank": 7,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -335,6 +329,7 @@
"cmpt_rank": 8,
"details": {
"directions": "/maps/dir//YogaSix+Palo+Alto,+240+Stanford+Shopping+Center,+Palo+Alto,+CA+94304/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fbb2e2a95157b:0x9d9f771f7b7742bf?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://www.yogasix.com/palo-alto"
},
"error": null,
@@ -353,6 +348,7 @@
"details": {
"directions": "/maps/dir//YogaSource,+158+Hamilton+Ave,+Palo+Alto,+CA+94301/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fbb3a3ff64567:0xb3bebd7e0ef7474d?sa=X&ved=1t:57443&ictx=111",
"schedule": "https://www.mindbodyonline.com/explore/locations/yogasource-palo-alto?hl=en-US&gei=iIOGacWOObeFxc8PiJ-3kQw&rwg_token=AFd1xnEdEpzcvpFj0nEOdY2PrCMkUSsEMJjPmGUqyCHZrSoH369qpKMz9sRtsQijKuuhZGSSy24dEcZL2s187bkygs_Abf-j7w%3D%3D&source=cat",
+ "type": "ratings",
"website": "http://yogasource.com/"
},
"error": null,
@@ -370,6 +366,7 @@
"cmpt_rank": 8,
"details": {
"directions": "/maps/dir//Yogarok,+1100+Chestnut+St+Suite+718,+Menlo+Park,+CA+94025/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fa57c9f732d11:0xcdc92ba5a53d0f9?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "https://yogarok.com/"
},
"error": null,
@@ -401,7 +398,8 @@
"cmpt_rank": 10,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -418,7 +416,8 @@
"cmpt_rank": 11,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -435,7 +434,8 @@
"cmpt_rank": 12,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -464,7 +464,7 @@
{
"cite": null,
"cmpt_rank": 14,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 27,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[2d1b05a046b2].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[2d1b05a046b2].json
index 51cf204..e477d25 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[2d1b05a046b2].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[2d1b05a046b2].json
@@ -154,7 +154,7 @@
{
"cite": null,
"cmpt_rank": 10,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 10,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[305b53af69be].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[305b53af69be].json
index 3240b3e..8577d7b 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[305b53af69be].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[305b53af69be].json
@@ -17,35 +17,26 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=bd16726c6a722966&q=donald+trump&udm=2&source=univ&sa=X&ved=2ahUKEwio9aONqcSSAxUNlmoFHbiZI1cQnN8JegQIHBAD"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.instagram.com/reel/DUWrLYEjNWf/"
},
{
- "misc": {},
"text": "Age|79 years|Jun 14, 1946",
- "title": "",
"url": "/search?sca_esv=bd16726c6a722966&q=donald+trump+age&stick=H4sIAAAAAAAAAOPgE-LUz9U3SC4ssTTQEs1OttIvSM0vyEkFUkXF-XlWiempi1gFUvLzEnNSFEqKSnMLFIBCAMqYnZc3AAAA&sa=X&ved=2ahUKEwio9aONqcSSAxUNlmoFHbiZI1cQ18AJegQIJRAB"
},
{
- "misc": {},
"text": "Party|Republican Party",
- "title": "",
"url": "/search?sca_esv=bd16726c6a722966&q=Republican+Party&si=AL3DRZHmwLjWhgnaPB3UTu10R6S5qNLXiQiKMeezfKyB1FMsRrUdcsLX7-nPSpyKZYIqfOJDBGOdz2E57-99eIANe1Pl8wwO7G3XqVt3qiEO4uERO4KYYXvGXEXRT6jOHlGcixAxcLt6zGSb8XfSKPYd5JeDPjiWx1lTb4E4mlK9t2l0ky4-1M_qoJaYHMURBGCmYARFqYnA&sa=X&ved=2ahUKEwio9aONqcSSAxUNlmoFHbiZI1cQ18AJegQIJBAB"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.instagram.com/p/DUYFnxqEZ16/"
}
]
@@ -581,7 +572,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 38,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[30c5d6bdb650].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[30c5d6bdb650].json
index 5e15019..0c452f3 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[30c5d6bdb650].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[30c5d6bdb650].json
@@ -17,89 +17,62 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "1m|Bloomberg Podcasts|YouTube • Feb 5, 2026",
- "title": "",
"url": "https://www.youtube.com/watch?v=XIMupMYgR5s&t=138"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reuters.com/technology/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://finance.yahoo.com/topic/tech/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.technologyreview.com/#:~:text=Commercial%20space%20stations:%2010%20Breakthrough%20Technologies%202026,outpost%20is%20scheduled%20to%20launch%20this%20May."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.technewsworld.com/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://news.mit.edu/#:~:text=Terahertz%20microscope%20reveals%20the%20motion%20of%20superconducting,observe%20terahertz%20%E2%80%9Cjiggles%E2%80%9D%20in%20a%20superconducting%20fluid."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nytimes.com/section/technology"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://indianexpress.com/section/technology/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.washingtonpost.com/business/technology/#:~:text=Young%20people%20in%20China%20have%20a%20new,and%20a%20digital%20ear%20for%20tech%20giants."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.washingtonpost.com/business/technology/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.technologyreview.com/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://news.mit.edu/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -327,12 +300,15 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [
- "What is the newest news about technology?",
- "Which technology is trending now?",
- "Chapter 14: Current Issues in Technology Open Washington Pressbooks https://openwa.pressbooks.pub › bustechessentials › chap... Open Washington Pressbooks https://openwa.pressbooks.pub › bustechessentials › chap...",
- "What is the best site for tech news?"
- ],
+ "details": {
+ "items": [
+ "What is the newest news about technology?",
+ "Which technology is trending now?",
+ "Chapter 14: Current Issues in Technology Open Washington Pressbooks https://openwa.pressbooks.pub › bustechessentials › chap... Open Washington Pressbooks https://openwa.pressbooks.pub › bustechessentials › chap...",
+ "What is the best site for tech news?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 16,
@@ -346,44 +322,35 @@
{
"cite": "https://www.fairobserver.com › science › tech",
"cmpt_rank": 12,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Latest science news",
- "url": "https://www.fairobserver.com/category/more/science/"
- },
- {
- "misc": {},
- "text": "",
- "title": "World News",
- "url": "https://www.fairobserver.com/category/world-news/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Science news 2024",
- "url": "/aclk?sa=L&ai=DChsSEwiBv9rKi8aSAxWCs4MHHTvOD3wYACICCAIQBxoCZWY&co=1&ase=2&gclid=EAIaIQobChMIgb_ayovGkgMVgrODBx07zg98EAMYASAHEgLrZPD_BwE&cid=CAASugHkaM6_nR2Y1-WNgAybVtKDkcdFam_-897gUEHlM1e3HFA1JEIRR6eIREBbIBig14-2q9GWztSURpc4PLuCZwZE7vTT5CYxJTXnG_PD7k2ZFBsPOuqoQrI5CLpqhsyN3xKtvjDJVrJ4jrDA_uSm6Xv9z80g82jndYYrV4MyhMgG6Y-37OV6WcoBvcdlJhgB560UAPfwyn8GUwZPnLQmn1yjEcHguG75f7hMKVjctY579I_Px1I9EpL1M9c&cce=2&category=acrcp_v1_32&sig=AOD64_1LICCufg99l0FEtMwusdarVTkFIg&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "India News",
- "url": "https://www.fairobserver.com/category/world-news/india-news/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Economics News",
- "url": "https://www.fairobserver.com/category/economics/"
- },
- {
- "misc": {},
- "text": "",
- "title": "All Events",
- "url": "https://www.fairobserver.com/events/"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Latest science news",
+ "url": "https://www.fairobserver.com/category/more/science/"
+ },
+ {
+ "title": "World News",
+ "url": "https://www.fairobserver.com/category/world-news/"
+ },
+ {
+ "title": "Science news 2024",
+ "url": "/aclk?sa=L&ai=DChsSEwiBv9rKi8aSAxWCs4MHHTvOD3wYACICCAIQBxoCZWY&co=1&ase=2&gclid=EAIaIQobChMIgb_ayovGkgMVgrODBx07zg98EAMYASAHEgLrZPD_BwE&cid=CAASugHkaM6_nR2Y1-WNgAybVtKDkcdFam_-897gUEHlM1e3HFA1JEIRR6eIREBbIBig14-2q9GWztSURpc4PLuCZwZE7vTT5CYxJTXnG_PD7k2ZFBsPOuqoQrI5CLpqhsyN3xKtvjDJVrJ4jrDA_uSm6Xv9z80g82jndYYrV4MyhMgG6Y-37OV6WcoBvcdlJhgB560UAPfwyn8GUwZPnLQmn1yjEcHguG75f7hMKVjctY579I_Px1I9EpL1M9c&cce=2&category=acrcp_v1_32&sig=AOD64_1LICCufg99l0FEtMwusdarVTkFIg&adurl=&q="
+ },
+ {
+ "title": "India News",
+ "url": "https://www.fairobserver.com/category/world-news/india-news/"
+ },
+ {
+ "title": "Economics News",
+ "url": "https://www.fairobserver.com/category/economics/"
+ },
+ {
+ "title": "All Events",
+ "url": "https://www.fairobserver.com/events/"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 17,
@@ -397,7 +364,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 18,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[39617f527744].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[39617f527744].json
index e65a273..54e3aae 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[39617f527744].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[39617f527744].json
@@ -112,12 +112,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "World News | Latest Top Stories - Reuters Reuters https://www.reuters.com › world Reuters https://www.reuters.com › world",
- "Latest News Today: Breaking News and Top Headlines from India ... indianexpress.com https://indianexpress.com indianexpress.com https://indianexpress.com",
- "What are current issues in the world right now?",
- "Top & Breaking World News Today - AP News AP News https://apnews.com › world-news AP News https://apnews.com › world-news"
- ],
+ "details": {
+ "items": [
+ "World News | Latest Top Stories - Reuters Reuters https://www.reuters.com › world Reuters https://www.reuters.com › world",
+ "Latest News Today: Breaking News and Top Headlines from India ... indianexpress.com https://indianexpress.com indianexpress.com https://indianexpress.com",
+ "What are current issues in the world right now?",
+ "Top & Breaking World News Today - AP News AP News https://apnews.com › world-news AP News https://apnews.com › world-news"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 7,
@@ -271,44 +274,35 @@
{
"cite": "https://www.democracynow.org",
"cmpt_rank": 11,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Headlines",
- "url": "https://www.democracynow.org/headlines"
- },
- {
- "misc": {},
- "text": "",
- "title": "Global News and Analysis",
- "url": "https://www.democracynow.org/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Climate Crisis",
- "url": "https://www.democracynow.org/topics/climate_change"
- },
- {
- "misc": {},
- "text": "",
- "title": "Topics In The News",
- "url": "https://www.democracynow.org/topics"
- },
- {
- "misc": {},
- "text": "",
- "title": "Watch Live",
- "url": "https://www.democracynow.org/shows"
- },
- {
- "misc": {},
- "text": "",
- "title": "Columns",
- "url": "https://www.democracynow.org/categories/weekly_column"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Headlines",
+ "url": "https://www.democracynow.org/headlines"
+ },
+ {
+ "title": "Global News and Analysis",
+ "url": "https://www.democracynow.org/"
+ },
+ {
+ "title": "Climate Crisis",
+ "url": "https://www.democracynow.org/topics/climate_change"
+ },
+ {
+ "title": "Topics In The News",
+ "url": "https://www.democracynow.org/topics"
+ },
+ {
+ "title": "Watch Live",
+ "url": "https://www.democracynow.org/shows"
+ },
+ {
+ "title": "Columns",
+ "url": "https://www.democracynow.org/categories/weekly_column"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 18,
@@ -322,7 +316,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 19,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[3c03a4a2cb7c].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[3c03a4a2cb7c].json
index 48b81eb..398be7e 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[3c03a4a2cb7c].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[3c03a4a2cb7c].json
@@ -280,7 +280,7 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 19,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[3c09a0f0c92f].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[3c09a0f0c92f].json
index 8f9fa81..a9ba363 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[3c09a0f0c92f].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[3c09a0f0c92f].json
@@ -17,125 +17,86 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://oversight.house.gov/release/hearing-wrap-up-americans-deserve-improved-vaccine-injury-and-compensation-systems/#:~:text=WASHINGTON%20%E2%80%94%20The%20Select%20Subcommittee%20on,a%20future%20public%20health%20crisis."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.congress.gov/event/119th-congress/senate-event/337242#:~:text=Hearings%20to%20examine%20voices%20of,Congress.gov%20%7C%20Library%20of%20Congress"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.sciencedirect.com/science/article/pii/S0264410X22010283#:~:text=Pfizer%20and%20Moderna%20mRNA%20COVID%2D19%20vaccines%20were%20associated%20with,of%20serious%20COVID%2D19%20outcomes."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nejm.org/doi/full/10.1056/NEJMsa2514268#:~:text=The%20RSVpreF%20vaccine%20was%20associated,and%20the%20Alumbra%20Innovations%20Foundation.)"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://publichealth.jhu.edu/2025/virus-transmission-trends-winter-2025-26#:~:text=Is%20this%20year's%20flu%20vaccine,%2Drelated%20hospitalizations%20this%20season.%E2%80%9D"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.congress.gov/118/meeting/house/117004/documents/HHRG-118-VC00-Transcript-20240321.pdf"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.hrsa.gov/vaccine-compensation#:~:text=How%20does%20the%20VICP%20work,and%20individuals%20who%20are%20deceased."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.uchealth.org/today/everything-you-need-to-know-about-the-2025-26-covid-19-vaccine-and-flu-shots/#:~:text=To%20answer%20some%20of%20your,the%20late%20fall%20and%20winter."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.congress.gov/event/119th-congress/senate-event/336985#:~:text=Senate%20Event%20336985-,Hearings%20to%20examine%20science%20and%20Federal%20health%20agencies%2C%20focusing%20on,119th%20Congress%20(2025%2D2026)"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://biontechse.gcs-web.com/news-releases/news-release-details/pfizer-and-biontech-publish-preclinical-data-investigational/#:~:text=In%20clinical%20studies%2C%20adverse%20reactions,to%20complete%20the%20vaccination%20series."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cdc.gov/acip/downloads/slides-2025-09-18-19/10-levi-COVID-508.pdf"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cidrap.umn.edu/influenza-vaccines/current-flu-vaccine-provides-moderate-protection-against-severe-disease-interim#:~:text=Two%20new%20analyses%2C%20one%20from,in%20the%202024%E2%80%9325%20season."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://cardiovascularbusiness.com/topics/clinical/covid-19/cardiologists-false-claims-used-promote-fake-covid-19-vaccine-recall-fact#:~:text=Cardiologist%20Peter%20McCullough%20no%20stranger,his%20claims%20as%20%22misleading.%22"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cidrap.umn.edu/covid-19/report-spotlights-52-us-doctors-who-posted-potentially-harmful-covid-misinformation-online#:~:text=Major%20themes%20were%20disputing%20COVID,inflammation%20of%20the%20heart%20muscle)."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.healthychildren.org/English/safety-prevention/immunizations/Pages/vaccine-studies-examine-the-evidence.aspx#:~:text=Research%20continues%20to%20confirm%20that,and%20teens%20from%20serious%20diseases."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=aafUmBFS42Q"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nbcnews.com/think/opinion/covid-vaccine-treatment-misinformation-medical-malpractice-it-should-be-punished-ncna1287180#:~:text=But%20a%20vocal%20minority%20of,to%20hold%20these%20doctors%20accountable."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=oZ1hgVNRMmE"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -167,12 +128,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Why are people refusing the COVID vaccine?",
- "Stanford Medicine study shows why mRNA COVID-19 vaccine can ... Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid",
- "What percentage of people have a serious side effect from the COVID vaccine?",
- "Adverse Effects of Pfizer (BioNTech), Oxford-AstraZeneca (ChAdOx1 ... PMC - NIH https://pmc.ncbi.nlm.nih.gov › articles › PMC9967558 PMC - NIH https://pmc.ncbi.nlm.nih.gov › articles › PMC9967558"
- ],
+ "details": {
+ "items": [
+ "Why are people refusing the COVID vaccine?",
+ "Stanford Medicine study shows why mRNA COVID-19 vaccine can ... Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid",
+ "What percentage of people have a serious side effect from the COVID vaccine?",
+ "Adverse Effects of Pfizer (BioNTech), Oxford-AstraZeneca (ChAdOx1 ... PMC - NIH https://pmc.ncbi.nlm.nih.gov › articles › PMC9967558 PMC - NIH https://pmc.ncbi.nlm.nih.gov › articles › PMC9967558"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[3f5efb1dc358].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[3f5efb1dc358].json
index 4751472..5314953 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[3f5efb1dc358].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[3f5efb1dc358].json
@@ -17,119 +17,82 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Tectonic Plate Movement",
- "title": "",
"url": "https://www.google.com/search?q=Tectonic+Plate+Movement&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgAEAo"
},
{
- "misc": {},
"text": "Faults and Stress",
- "title": "",
"url": "https://www.google.com/search?q=Faults+and+Stress&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgAEAw"
},
{
- "misc": {},
"text": "Elastic Rebound Theory",
- "title": "",
"url": "https://www.google.com/search?q=Elastic+Rebound+Theory&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgBEAE"
},
{
- "misc": {},
"text": "Location of Release",
- "title": "",
"url": "https://www.google.com/search?q=Location+of+Release&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgCEAE"
},
{
- "misc": {},
"text": "Other Causes",
- "title": "",
"url": "https://www.google.com/search?q=Other+Causes&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgDEAE"
},
{
- "misc": {},
"text": "Transform Boundaries",
- "title": "",
"url": "https://www.google.com/search?q=Transform+Boundaries&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgFEAE"
},
{
- "misc": {},
"text": "Convergent Boundaries",
- "title": "",
"url": "https://www.google.com/search?q=Convergent+Boundaries&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgFEAM"
},
{
- "misc": {},
"text": "Divergent Boundaries",
- "title": "",
"url": "https://www.google.com/search?q=Divergent+Boundaries&sei=Xn6GaYH8D86yqwHB5YQY&ved=2ahUKEwiGnv2th8aSAxVmCBAIHRcaAXcQgK4QegYIAQgFEAU"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.usgs.gov/faqs/what-earthquake-and-what-causes-them-happen"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.mtu.edu/geo/community/seismology/learn/earthquake-cause/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://mauiready.wordpress.com/be-informed-2/earthquakes/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.usgs.gov/programs/earthquake-hazards/science-earthquakes"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://study.com/learn/lesson/video/earthquake-causes-effects-locations.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/science/earthquake-geology"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.earthquakescanada.nrcan.gc.ca/info-gen/faq-en.php"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "http://www.seismo.ethz.ch/en/knowledge/faq/what-causes-earthquakes/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.bbc.co.uk/bitesize/guides/zcv7hyc/revision/3"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -161,12 +124,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Why Do Earthquakes Happen? Michigan Technological University https://www.mtu.edu › geo › learn › earthquake-cause Michigan Technological University https://www.mtu.edu › geo › learn › earthquake-cause",
- "What is a 7 letter word for earthquake?",
- "Preparing for an earthquake - Toowoomba Regional Council Toowoomba Regional Council https://www.tr.qld.gov.au › be-prepared-for-emergencies Toowoomba Regional Council https://www.tr.qld.gov.au › be-prepared-for-emergencies",
- "Is a 4.8 earthquake bad?"
- ],
+ "details": {
+ "items": [
+ "Why Do Earthquakes Happen? Michigan Technological University https://www.mtu.edu › geo › learn › earthquake-cause Michigan Technological University https://www.mtu.edu › geo › learn › earthquake-cause",
+ "What is a 7 letter word for earthquake?",
+ "Preparing for an earthquake - Toowoomba Regional Council Toowoomba Regional Council https://www.tr.qld.gov.au › be-prepared-for-emergencies Toowoomba Regional Council https://www.tr.qld.gov.au › be-prepared-for-emergencies",
+ "Is a 4.8 earthquake bad?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -364,7 +330,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -421,7 +388,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 20,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[45b6e019bfa2].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[45b6e019bfa2].json
index c021249..3be7266 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[45b6e019bfa2].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[45b6e019bfa2].json
@@ -17,89 +17,62 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=7ieFabDdM-C4qtsP5diJ-AE&ved=2ahUKEwiSt-LkwMOSAxXXm2oFHXpFH9kQgK4QegYIAQgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/askscience/comments/14566ig/why_is_the_sky_blue_do_i_understand_it_correctly/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue#:~:text=Gases%20and%20particles%20in%20Earth's%20atmosphere%20scatter,a%20blue%20sky%20most%20of%20the%20time."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://kids.nationalgeographic.com/books/article/sky"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/story/why-is-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.uu.edu/dept/physics/scienceguys/2000Oct.cfm"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://plus.nasa.gov/video/space-place-in-a-snap-why-is-the-sky-blue-2/#:~:text=When%20sunlight%20enters%20Earth's%20atmosphere%2C%20it%20encounters,sky%20appears%20blue%20on%20a%20sunny%20day."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://morgridge.org/blue-sky/why-is-the-sky-blue/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://plus.nasa.gov/video/space-place-in-a-snap-why-is-the-sky-blue-2/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -117,12 +90,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "What is the true color of our sky?",
- "How to explain to a kid why the sky is blue?",
- "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "What is the true color of our sky?",
+ "How to explain to a kid why the sky is blue?",
+ "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -376,7 +352,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -393,7 +370,8 @@
"cmpt_rank": 9,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -436,7 +414,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 23,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[4c8d8d2f226c].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[4c8d8d2f226c].json
index ce4c0d2..4af1e18 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[4c8d8d2f226c].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[4c8d8d2f226c].json
@@ -154,7 +154,7 @@
{
"cite": "https://www.missionlocal.org",
"cmpt_rank": 10,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 10,
@@ -168,14 +168,17 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [
- "KICU-TV",
- "KPIX-TV",
- "KNTV",
- "KTVU",
- "KGO-TV",
- "KTTV"
- ],
+ "details": {
+ "items": [
+ "KICU-TV",
+ "KPIX-TV",
+ "KNTV",
+ "KTVU",
+ "KGO-TV",
+ "KTTV"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 11,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[53940e35cc92].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[53940e35cc92].json
index 3d16b7c..098b975 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[53940e35cc92].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[53940e35cc92].json
@@ -28,12 +28,14 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": {},
+ "details": {
+ "type": "ratings"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
"sub_rank": 0,
- "sub_type": "results_for__portland,_or",
+ "sub_type": "results_for",
"text": "Portland Lux Coffee<|>4.7<|>(270)<|> · <|>$10–20<|> · Coffee shop<|>11059 SE Division St<|> <|> <|> <|> <|> <|> <|>\"They have really good <|>coffee<|>, friendly staff, and the food is amazing!\"",
"title": "Portland Lux Coffee",
"type": "local_results",
@@ -42,12 +44,14 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": {},
+ "details": {
+ "type": "ratings"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
"sub_rank": 1,
- "sub_type": "results_for__portland,_or",
+ "sub_type": "results_for",
"text": "Less and more coffee<|>4.8<|>(426)<|> · <|>$1–10<|> · Coffee shop<|>1003 SW 5th Ave<|>Closed<|> · Opens 9 AM Sat<|> <|> <|> <|> <|> <|> <|>\"❤️ Their <|>coffee<|> choices are immensely unique and everything tastes so good.\"",
"title": "Less and more coffee",
"type": "local_results",
@@ -56,12 +60,14 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": {},
+ "details": {
+ "type": "ratings"
+ },
"error": null,
"section": "main",
"serp_rank": 3,
"sub_rank": 2,
- "sub_type": "results_for__portland,_or",
+ "sub_type": "results_for",
"text": "Above Grnd Coffee<|>4.8<|>(192)<|> · <|>$1–10<|> · Coffee shop<|>1100 SE Grand Ave<|> <|> <|> <|> <|> <|> <|>\"The staff were really friendly and the banana milk latte is to die for!\"",
"title": "Above Grnd Coffee",
"type": "local_results",
@@ -212,7 +218,8 @@
"cmpt_rank": 10,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -227,14 +234,17 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [
- "Abba Coffee Roasters",
- "Deadstock Coffee Roasters",
- "Kalesa Coffee",
- "J Vein Caffé",
- "Cathedral Coffee",
- "Cadejo Coffee"
- ],
+ "details": {
+ "items": [
+ "Abba Coffee Roasters",
+ "Deadstock Coffee Roasters",
+ "Kalesa Coffee",
+ "J Vein Caffé",
+ "Cathedral Coffee",
+ "Cadejo Coffee"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 15,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[56cbcf8cd4dc].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[56cbcf8cd4dc].json
index 3317e72..ae06a3f 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[56cbcf8cd4dc].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[56cbcf8cd4dc].json
@@ -28,7 +28,7 @@
{
"cite": "https://www.sfspca.org",
"cmpt_rank": 1,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 1,
@@ -44,13 +44,14 @@
"cmpt_rank": 2,
"details": {
"directions": "/maps/dir//Camp+TLC+Goldens,+CAMP+TLC+GOLDENS,+2234+Leo+Pl,+Aromas,+CA+95004/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808e039404ffb2c7:0xf78fd0ffe3e7783c?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://camptlcgoldens.com/"
},
"error": null,
"section": "main",
"serp_rank": 2,
"sub_rank": 0,
- "sub_type": "results_for__palo_alto,_ca_94301",
+ "sub_type": "results_for",
"text": "Camp TLC Goldens<|>5.0<|>(32)<|> · Dog breeder<|>35+ years in business · Aromas, CA · (408) 656-6402<|>Open 24 hours<|> <|> <|> <|> <|> <|> <|>\"Our puppy is so well behaved, groomed, and adjusted.\"",
"title": "Camp TLC Goldens",
"type": "local_results",
@@ -60,13 +61,14 @@
"cite": null,
"cmpt_rank": 2,
"details": {
+ "type": "ratings",
"website": "https://www.sunsetretrievers.com/"
},
"error": null,
"section": "main",
"serp_rank": 3,
"sub_rank": 1,
- "sub_type": "results_for__palo_alto,_ca_94301",
+ "sub_type": "results_for",
"text": "Sunset Retrievers<|>4.9<|>(67)<|> · Dog breeder<|>3+ years in business · (530) 748-7024<|>Open<|> · Closes 6 PM<|> <|> <|> <|> <|> <|> <|>\"She is responsive, knowledgeable and helped us get the perfect <|>pup<|>.\"",
"title": "Sunset Retrievers",
"type": "local_results",
@@ -76,13 +78,14 @@
"cite": null,
"cmpt_rank": 2,
"details": {
+ "type": "ratings",
"website": "https://www.eastbaygoldenretrievers.com/"
},
"error": null,
"section": "main",
"serp_rank": 4,
"sub_rank": 2,
- "sub_type": "results_for__palo_alto,_ca_94301",
+ "sub_type": "results_for",
"text": "East Bay Golden Retrievers<|>4.8<|>(11)<|> · Dog breeder<|>3+ years in business · (510) 815-8338<|>Open 24 hours<|> <|> <|> <|> <|> <|> <|>\"Sophie and Jonathan made the process so smooth when getting our puppy!\"",
"title": "East Bay Golden Retrievers",
"type": "local_results",
@@ -93,13 +96,14 @@
"cmpt_rank": 2,
"details": {
"directions": "/aclk?sa=L&ai=DChsSEwiKnf3Ah8aSAxUNWpEFHZ9zBJ4YACICCAEQBRoCbHI&co=1&ase=2&gclid=EAIaIQobChMIip39wIfGkgMVDVqRBR2fcwSeEBAYASACEgIQkfD_BwE&cid=CAASugHkaKbCC_7FvsGkN4CC6jOFdZnVj4e22hHkmQt7ALvmE2fj_VQjrXw1woH3BOT7kvWiZA2jxETL2v58ZxOxE1QfOEeO5W3DIJCDGnumZMmSLEaUypyQ8TPlMtsXtPsd0v8kILLJttd9iZ5B2dMg9W2VECSbitoTqilW2AhlP2OwLVmzB9EIIxmLKZ-bTlI2Z01Sr9JwUGcmuBm9UZ8hGOYoUKCwGeQM7UE2GjeHBrpzs8OThKO-tzfqxzY&cce=2&category=acrcp_v1_32&sig=AOD64_3Ak2FUEJSCDCYZl-A6LdM_8msBbg&ctype=50&q=&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQhawBegQINBAS&adurl=",
+ "type": "ratings",
"website": "/aclk?sa=L&ai=DChsSEwiKnf3Ah8aSAxUNWpEFHZ9zBJ4YACICCAEQABoCbHI&co=1&ase=2&gclid=EAIaIQobChMIip39wIfGkgMVDVqRBR2fcwSeEBAYASAAEgJy-fD_BwE&cid=CAASugHkaKbCC_7FvsGkN4CC6jOFdZnVj4e22hHkmQt7ALvmE2fj_VQjrXw1woH3BOT7kvWiZA2jxETL2v58ZxOxE1QfOEeO5W3DIJCDGnumZMmSLEaUypyQ8TPlMtsXtPsd0v8kILLJttd9iZ5B2dMg9W2VECSbitoTqilW2AhlP2OwLVmzB9EIIxmLKZ-bTlI2Z01Sr9JwUGcmuBm9UZ8hGOYoUKCwGeQM7UE2GjeHBrpzs8OThKO-tzfqxzY&cce=2&category=acrcp_v1_32&sig=AOD64_253-gfE7QqmGK-tPVNzoOs8lMGIA&q=&nis=4&ctype=99&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQhKwBegQINBAQ&adurl="
},
"error": null,
"section": "main",
"serp_rank": 5,
"sub_rank": 3,
- "sub_type": "results_for__palo_alto,_ca_94301",
+ "sub_type": "results_for",
"text": "Sponsored<|>Humane Society Silicon Valley<|> <|>My Ad Center<|>4.7<|>(1.7K)<|> · Non-profit organization<|>10+ years in business · 901 Ames Avenue · (408) 262-2133<|>Open<|> · Closes 5 PM",
"title": "Humane Society Silicon Valley My Ad Center",
"type": "local_results",
@@ -136,12 +140,15 @@
{
"cite": null,
"cmpt_rank": 5,
- "details": [
- "What is the average cost of a golden retriever puppy?",
- "How much would you pay for a golden retriever puppy?",
- "What is the hardest part of owning a golden retriever?",
- "Is $2500 a lot for a golden retriever?"
- ],
+ "details": {
+ "items": [
+ "What is the average cost of a golden retriever puppy?",
+ "How much would you pay for a golden retriever puppy?",
+ "What is the hardest part of owning a golden retriever?",
+ "Is $2500 a lot for a golden retriever?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 8,
@@ -173,41 +180,30 @@
"heading": "Find related products & services",
"img_url": null,
"text": "Find related products & services|About the source|About the source|About the source|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These searches help you find relevant offers from advertisers",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Golden Retriever puppies |adoption near me",
- "title": "",
"url": "http://www.google.com/search?q=Golden+Retriever+puppies+adoption+near+me&mrq=1&mrqri=0&mrqei=hn6GadSgFtH4wPAPt6jmiAQ&sa=X&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQyO0OKAB6BAhDEAU"
},
{
- "misc": {},
"text": "Golden Retriever puppies |free",
- "title": "",
"url": "http://www.google.com/search?q=Golden+Retriever+puppies+free&mrq=1&mrqri=1&mrqei=hn6GadSgFtH4wPAPt6jmiAQ&sa=X&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQyO0OKAF6BAhDEAY"
},
{
- "misc": {},
"text": "Golden Retriever puppies |shelter",
- "title": "",
"url": "http://www.google.com/search?q=Golden+Retriever+puppies+shelter&mrq=1&mrqri=2&mrqei=hn6GadSgFtH4wPAPt6jmiAQ&sa=X&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQyO0OKAJ6BAhDEAc"
},
{
- "misc": {},
"text": "Golden Retriever puppies |sacramento under $500",
- "title": "",
"url": "http://www.google.com/search?q=Golden+Retriever+puppies+sacramento+under+$500&mrq=1&mrqri=3&mrqei=hn6GadSgFtH4wPAPt6jmiAQ&sa=X&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQyO0OKAN6BAhDEAg"
},
{
- "misc": {},
"text": "Golden Retriever puppies |for sale under $300",
- "title": "",
"url": "http://www.google.com/search?q=Golden+Retriever+puppies+for+sale+under+$300&mrq=1&mrqri=4&mrqei=hn6GadSgFtH4wPAPt6jmiAQ&sa=X&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQyO0OKAR6BAhDEAk"
},
{
- "misc": {},
"text": "English| Golden Retriever puppies |near me",
- "title": "",
"url": "http://www.google.com/search?q=English+Golden+Retriever+puppies+near+me&mrq=1&mrqri=5&mrqei=hn6GadSgFtH4wPAPt6jmiAQ&sa=X&ved=2ahUKEwjU-_bAh8aSAxVRPBAIHTeUGUEQyO0OKAV6BAhDEAo"
}
]
@@ -309,32 +305,27 @@
{
"cite": "https://www.puppyspot.com",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Litters Ready To Go Home Soon",
- "url": "https://www.puppyspot.com/puppies-for-sale/breed/golden-retriever%7Bignore%7D?cq_src=google_ads&cq_cmp=%7Bcampaignid%7D&cq_term=%7Bkeyword%7D&cq_med=%7Badtype%7D&cq_plac=%7Bplacement%7D&cq_net=%7Bnetwork%7D&cq_plt=gp"
- },
- {
- "misc": {},
- "text": "",
- "title": "AKC Approved & Authorized",
- "url": "https://www.puppyspot.com/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Search Puppies",
- "url": "https://www.puppyspot.com/puppies-for-sale/"
- },
- {
- "misc": {},
- "text": "",
- "title": "About Us",
- "url": "https://www.puppyspot.com/about-us"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Litters Ready To Go Home Soon",
+ "url": "https://www.puppyspot.com/puppies-for-sale/breed/golden-retriever%7Bignore%7D?cq_src=google_ads&cq_cmp=%7Bcampaignid%7D&cq_term=%7Bkeyword%7D&cq_med=%7Badtype%7D&cq_plac=%7Bplacement%7D&cq_net=%7Bnetwork%7D&cq_plt=gp"
+ },
+ {
+ "title": "AKC Approved & Authorized",
+ "url": "https://www.puppyspot.com/"
+ },
+ {
+ "title": "Search Puppies",
+ "url": "https://www.puppyspot.com/puppies-for-sale/"
+ },
+ {
+ "title": "About Us",
+ "url": "https://www.puppyspot.com/about-us"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 17,
@@ -348,32 +339,27 @@
{
"cite": "https://www.rrdog.org",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Puppies Available For Adoption",
- "url": "/aclk?sa=L&ai=DChsSEwiKnf3Ah8aSAxUNWpEFHZ9zBJ4YACICCAIQCBoCbHI&co=1&ase=2&gclid=EAIaIQobChMIip39wIfGkgMVDVqRBR2fcwSeEAMYAiAEEgKBQfD_BwE&cid=CAASugHkaKbCC_7FvsGkN4CC6jOFdZnVj4e22hHkmQt7ALvmE2fj_VQjrXw1woH3BOT7kvWiZA2jxETL2v58ZxOxE1QfOEeO5W3DIJCDGnumZMmSLEaUypyQ8TPlMtsXtPsd0v8kILLJttd9iZ5B2dMg9W2VECSbitoTqilW2AhlP2OwLVmzB9EIIxmLKZ-bTlI2Z01Sr9JwUGcmuBm9UZ8hGOYoUKCwGeQM7UE2GjeHBrpzs8OThKO-tzfqxzY&cce=2&category=acrcp_v1_32&sig=AOD64_2TjLcqkMLEP_05yBeuTQKYXYOr6A&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Dogs Available for Adoption",
- "url": "https://rrdog.org/adoption/adults/"
- },
- {
- "misc": {},
- "text": "",
- "title": "See List Of Available Puppies",
- "url": "https://rrdog.org/adoption/availablepuppies/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Foster",
- "url": "https://rrdog.org/foster-a-dog/"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Puppies Available For Adoption",
+ "url": "/aclk?sa=L&ai=DChsSEwiKnf3Ah8aSAxUNWpEFHZ9zBJ4YACICCAIQCBoCbHI&co=1&ase=2&gclid=EAIaIQobChMIip39wIfGkgMVDVqRBR2fcwSeEAMYAiAEEgKBQfD_BwE&cid=CAASugHkaKbCC_7FvsGkN4CC6jOFdZnVj4e22hHkmQt7ALvmE2fj_VQjrXw1woH3BOT7kvWiZA2jxETL2v58ZxOxE1QfOEeO5W3DIJCDGnumZMmSLEaUypyQ8TPlMtsXtPsd0v8kILLJttd9iZ5B2dMg9W2VECSbitoTqilW2AhlP2OwLVmzB9EIIxmLKZ-bTlI2Z01Sr9JwUGcmuBm9UZ8hGOYoUKCwGeQM7UE2GjeHBrpzs8OThKO-tzfqxzY&cce=2&category=acrcp_v1_32&sig=AOD64_2TjLcqkMLEP_05yBeuTQKYXYOr6A&adurl=&q="
+ },
+ {
+ "title": "Dogs Available for Adoption",
+ "url": "https://rrdog.org/adoption/adults/"
+ },
+ {
+ "title": "See List Of Available Puppies",
+ "url": "https://rrdog.org/adoption/availablepuppies/"
+ },
+ {
+ "title": "Foster",
+ "url": "https://rrdog.org/foster-a-dog/"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 18,
@@ -387,7 +373,7 @@
{
"cite": null,
"cmpt_rank": 15,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 19,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[56f2eab63e9d].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[56f2eab63e9d].json
index 68ff7b7..af36716 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[56f2eab63e9d].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[56f2eab63e9d].json
@@ -31,11 +31,10 @@
"details": {
"heading": "6719°F°F°C °C",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Google Weather",
- "title": "",
"url": "https://support.google.com/websearch/answer/13687874"
}
]
@@ -193,7 +192,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 12,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[5898b04fb534].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[5898b04fb534].json
index d9df038..eed9666 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[5898b04fb534].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[5898b04fb534].json
@@ -16,11 +16,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "345",
"price": "$96",
"rating": "3.2",
- "reviews": "(345)",
"source": "Expedia.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -37,11 +38,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "2.1K",
"price": "$89",
"rating": "3.8",
- "reviews": "(2.1K)",
"source": "Booking.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -58,11 +60,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "1.9K",
"price": "$110",
"rating": "4.2",
- "reviews": "(1.9K)",
"source": "Hotels.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -79,11 +82,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "7.5K",
"price": "$153",
"rating": "4.7",
- "reviews": "(7.5K)",
"source": "RIU Plaza",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -100,11 +104,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "3.6K",
"price": "$177",
"rating": "3.9",
- "reviews": "(3.6K)",
"source": "The Lexington Hotel, Autograph Collection",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -121,11 +126,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "1.5K",
"price": "$84",
"rating": "3.8",
- "reviews": "(1.5K)",
"source": "Aloft New York Brooklyn",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -142,11 +148,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Free cancellation",
+ "n_reviews": "2.9K",
"price": "$123",
"rating": "3.8",
- "reviews": "(2.9K)",
"source": "Hyatt Place New York City / Times Square",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -163,11 +170,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "2.1K",
"price": "$147",
"rating": "3.5",
- "reviews": "(2.1K)",
"source": "Holiday Inn Express New York City Times Square by IHG",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -184,11 +192,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "3.4K",
"price": "$282",
"rating": "4.4",
- "reviews": "(3.4K)",
"source": "Hyatt Centric Times Square New York",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -205,11 +214,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "2K",
"price": "$164",
"rating": "4.0",
- "reviews": "(2K)",
"source": "Dream Midtown, by Hyatt",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -226,11 +236,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "6.5K",
"price": "$93",
"rating": "3.5",
- "reviews": "(6.5K)",
"source": "Booking.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -247,11 +258,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "1.9K",
"price": "$93",
"rating": "4.2",
- "reviews": "(1.9K)",
"source": "Expedia.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -268,11 +280,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "4.9K",
"price": "$142",
"rating": "4.1",
- "reviews": "(4.9K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -289,11 +302,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "3.7K",
"price": "$103",
"rating": "3.6",
- "reviews": "(3.7K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -310,11 +324,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "1.2K",
"price": "$101",
"rating": "4.0",
- "reviews": "(1.2K)",
"source": "Booking.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -331,11 +346,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "1.8K",
"price": "$105",
"rating": "4.1",
- "reviews": "(1.8K)",
"source": "Expedia.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -352,11 +368,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Free cancellation",
+ "n_reviews": "7.5K",
"price": "$129",
"rating": "3.9",
- "reviews": "(7.5K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -373,11 +390,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "12K",
"price": "$119",
"rating": "3.9",
- "reviews": "(12K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -394,11 +412,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Free cancellation",
+ "n_reviews": "934",
"price": "$106",
"rating": "3.5",
- "reviews": "(934)",
"source": "Expedia.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -415,11 +434,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Free cancellation",
+ "n_reviews": "9.8K",
"price": "$137",
"rating": "3.0",
- "reviews": "(9.8K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -436,11 +456,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "8.4K",
"price": "$182",
"rating": "4.0",
- "reviews": "(8.4K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -457,11 +478,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Bar",
+ "n_reviews": "936",
"price": "$121",
"rating": "4.7",
- "reviews": "(936)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -478,11 +500,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Free cancellation",
+ "n_reviews": "7.3K",
"price": "$135",
"rating": "3.4",
- "reviews": "(7.3K)",
"source": "Booking.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -499,11 +522,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "2.7K",
"price": "$152",
"rating": "4.6",
- "reviews": "(2.7K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -520,11 +544,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "3.9K",
"price": "$100",
"rating": "4.1",
- "reviews": "(3.9K)",
"source": "Booking.com",
- "stars": "3-star hotel"
+ "stars": "3-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -541,11 +566,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "528",
"price": "$118",
"rating": "3.7",
- "reviews": "(528)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -562,11 +588,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Fitness center",
+ "n_reviews": "289",
"price": "$103",
"rating": "4.2",
- "reviews": "(289)",
"source": "Expedia.com",
- "stars": "Pet-friendly"
+ "stars": "Pet-friendly",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -583,11 +610,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "2.8K",
"price": "$93",
"rating": "3.7",
- "reviews": "(2.8K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -604,11 +632,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Restaurant",
+ "n_reviews": "10K",
"price": "$151",
"rating": "4.5",
- "reviews": "(10K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -625,11 +654,12 @@
"cmpt_rank": 0,
"details": {
"amenity": "Pet-friendly",
+ "n_reviews": "12K",
"price": "$186",
"rating": "3.8",
- "reviews": "(12K)",
"source": "Expedia.com",
- "stars": "4-star hotel"
+ "stars": "4-star hotel",
+ "type": "ratings"
},
"error": null,
"section": "main",
@@ -644,38 +674,31 @@
{
"cite": "https://www.kayak.com",
"cmpt_rank": 1,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Hotels from $124/night",
- "url": "/aclk?sa=L&pf=1&ai=DChsSEwiWi5_yicaSAxV-uykDHe9sE0YYACICCAEQRBoCdGI&co=1&ase=2&gclid=EAIaIQobChMIlouf8onGkgMVfrspAx3vbBNGEAAYASAGEgL9BPD_BwE&cid=CAASugHkaMFohy-mT2lmuGvId03LA7AIRCh_b97fdK_aD4uBjb5P7ztY7b_hKLpyLP8xsp7bv1NqWw9-P8FCMQQsw0NyVCYVNFVt8UoDBbbxpN2by_mEBACDcGYUcrIyLeCCw5PRjlDRwAaRC_ryXzN5wnbSrTMSTEpDORNRjVhrpDuCjV3crjEvhxtE4Zecenso4_zM8rAvXsW_q_LNQ_daZANas_UU5UGIYg618_nyrA0sRXhuU5Qjlxg8VgA&cce=2&category=acrcp_v1_32&sig=AOD64_0My-ImSazb-_qglRkyp8UQ9xxVVA&adurl=https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html%3Faid%3D134292535167%26tid%3Dkwd-145794293%26locp%3D9031967%26loci%3D%26mt%3Db%26n%3Dg%26d%3Dc%26cid%3D763710666141%26pos%3D%26gad_source%3D1%26gad_campaignid%3D15484165362%26gbraid%3D0AAAAADroXTA62kPDsbqehCvq8bRKxn01d%26gclid%3DEAIaIQobChMIlouf8onGkgMVfrspAx3vbBNGEAAYASAGEgL9BPD_BwE&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "4-star hotels",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_general/any/en.html?tags=f:393&"
- },
- {
- "misc": {},
- "text": "",
- "title": "Low prices on great hotels",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?"
- },
- {
- "misc": {},
- "text": "",
- "title": "Luxury Hotels",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:1455&"
- },
- {
- "misc": {},
- "text": "",
- "title": "All-inclusive",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:1459&"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Hotels from $124/night",
+ "url": "/aclk?sa=L&pf=1&ai=DChsSEwiWi5_yicaSAxV-uykDHe9sE0YYACICCAEQRBoCdGI&co=1&ase=2&gclid=EAIaIQobChMIlouf8onGkgMVfrspAx3vbBNGEAAYASAGEgL9BPD_BwE&cid=CAASugHkaMFohy-mT2lmuGvId03LA7AIRCh_b97fdK_aD4uBjb5P7ztY7b_hKLpyLP8xsp7bv1NqWw9-P8FCMQQsw0NyVCYVNFVt8UoDBbbxpN2by_mEBACDcGYUcrIyLeCCw5PRjlDRwAaRC_ryXzN5wnbSrTMSTEpDORNRjVhrpDuCjV3crjEvhxtE4Zecenso4_zM8rAvXsW_q_LNQ_daZANas_UU5UGIYg618_nyrA0sRXhuU5Qjlxg8VgA&cce=2&category=acrcp_v1_32&sig=AOD64_0My-ImSazb-_qglRkyp8UQ9xxVVA&adurl=https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html%3Faid%3D134292535167%26tid%3Dkwd-145794293%26locp%3D9031967%26loci%3D%26mt%3Db%26n%3Dg%26d%3Dc%26cid%3D763710666141%26pos%3D%26gad_source%3D1%26gad_campaignid%3D15484165362%26gbraid%3D0AAAAADroXTA62kPDsbqehCvq8bRKxn01d%26gclid%3DEAIaIQobChMIlouf8onGkgMVfrspAx3vbBNGEAAYASAGEgL9BPD_BwE&q="
+ },
+ {
+ "title": "4-star hotels",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_general/any/en.html?tags=f:393&"
+ },
+ {
+ "title": "Low prices on great hotels",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?"
+ },
+ {
+ "title": "Luxury Hotels",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:1455&"
+ },
+ {
+ "title": "All-inclusive",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:1459&"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 30,
@@ -689,7 +712,7 @@
{
"cite": "https://www.expedia.com/",
"cmpt_rank": 1,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 31,
@@ -703,7 +726,7 @@
{
"cite": "http://www.booking.com › manhattan › hotels",
"cmpt_rank": 1,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 32,
@@ -814,12 +837,15 @@
{
"cite": null,
"cmpt_rank": 6,
- "details": [
- "10 Best Neighborhoods in NYC: Where to Stay (A Local's Guide) Destination Dreamer Diaries https://www.destinationdreamerdiaries.com › blog › 10-b... Destination Dreamer Diaries https://www.destinationdreamerdiaries.com › blog › 10-b...",
- "Is it better to stay in Times Square or Midtown?",
- "What hotels do the Kardashians stay at in NYC?",
- "What is the number one hotel in NYC?"
- ],
+ "details": {
+ "items": [
+ "10 Best Neighborhoods in NYC: Where to Stay (A Local's Guide) Destination Dreamer Diaries https://www.destinationdreamerdiaries.com › blog › 10-b... Destination Dreamer Diaries https://www.destinationdreamerdiaries.com › blog › 10-b...",
+ "Is it better to stay in Times Square or Midtown?",
+ "What hotels do the Kardashians stay at in NYC?",
+ "What is the number one hotel in NYC?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 39,
@@ -837,41 +863,30 @@
"heading": "Find related products & services",
"img_url": null,
"text": "Find related products & services|About the source|About the source|About the source|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These searches help you find relevant offers from advertisers",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Hotels in Manhattan |Downtown",
- "title": "",
"url": "http://www.google.com/search?q=Hotels+in+Manhattan+Downtown&mrq=1&mrqri=0&mrqei=BoGGadPHIoqo2roPqNvyoQI&sa=X&ved=2ahUKEwjT4pnyicaSAxUKlFYBHaitPCQQyO0OKAB6BAg1EAU"
},
{
- "misc": {},
"text": "Hotels in manhattan |with view",
- "title": "",
"url": "http://www.google.com/search?q=Hotels+in+manhattan+with+view&mrq=1&mrqri=1&mrqei=BoGGadPHIoqo2roPqNvyoQI&sa=X&ved=2ahUKEwjT4pnyicaSAxUKlFYBHaitPCQQyO0OKAF6BAg1EAY"
},
{
- "misc": {},
"text": "Hotels in manhattan |near grand central station",
- "title": "",
"url": "http://www.google.com/search?q=Hotels+in+manhattan+near+grand+central+station&mrq=1&mrqri=2&mrqei=BoGGadPHIoqo2roPqNvyoQI&sa=X&ved=2ahUKEwjT4pnyicaSAxUKlFYBHaitPCQQyO0OKAJ6BAg1EAc"
},
{
- "misc": {},
"text": "Hotels in Manhattan |Upper West Side",
- "title": "",
"url": "http://www.google.com/search?q=Hotels+in+Manhattan+Upper+West+Side&mrq=1&mrqri=3&mrqei=BoGGadPHIoqo2roPqNvyoQI&sa=X&ved=2ahUKEwjT4pnyicaSAxUKlFYBHaitPCQQyO0OKAN6BAg1EAg"
},
{
- "misc": {},
"text": "Hotels in Manhattan |ny near Times Square",
- "title": "",
"url": "http://www.google.com/search?q=Hotels+in+Manhattan+ny+near+Times+Square&mrq=1&mrqri=4&mrqei=BoGGadPHIoqo2roPqNvyoQI&sa=X&ved=2ahUKEwjT4pnyicaSAxUKlFYBHaitPCQQyO0OKAR6BAg1EAk"
},
{
- "misc": {},
"text": "Hotels in Manhattan |NY cheap",
- "title": "",
"url": "http://www.google.com/search?q=Hotels+in+Manhattan+NY+cheap&mrq=1&mrqri=5&mrqei=BoGGadPHIoqo2roPqNvyoQI&sa=X&ved=2ahUKEwjT4pnyicaSAxUKlFYBHaitPCQQyO0OKAV6BAg1EAo"
}
]
@@ -1239,38 +1254,31 @@
{
"cite": "https://www.kayak.com",
"cmpt_rank": 16,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "4-star hotels",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:393&"
- },
- {
- "misc": {},
- "text": "",
- "title": "Low prices on great hotels",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?"
- },
- {
- "misc": {},
- "text": "",
- "title": "Hotel deals in New York",
- "url": "/aclk?sa=L&ai=DChsSEwiWi5_yicaSAxV-uykDHe9sE0YYACICCAIQDRoCdGI&co=1&ase=2&gclid=EAIaIQobChMIlouf8onGkgMVfrspAx3vbBNGEAMYASAGEgIB8fD_BwE&cid=CAASugHkaMFohy-mT2lmuGvId03LA7AIRCh_b97fdK_aD4uBjb5P7ztY7b_hKLpyLP8xsp7bv1NqWw9-P8FCMQQsw0NyVCYVNFVt8UoDBbbxpN2by_mEBACDcGYUcrIyLeCCw5PRjlDRwAaRC_ryXzN5wnbSrTMSTEpDORNRjVhrpDuCjV3crjEvhxtE4Zecenso4_zM8rAvXsW_q_LNQ_daZANas_UU5UGIYg618_nyrA0sRXhuU5Qjlxg8VgA&cce=2&category=acrcp_v1_32&sig=AOD64_2r-8XkMa8zCw0x7msbEBZPE57ZaA&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Luxury Hotels",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:1455&"
- },
- {
- "misc": {},
- "text": "",
- "title": "All-inclusive",
- "url": "https://www.kayak.com/semi/adwordssearch/hotel_general/any/en.html?tags=f:1459&"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "4-star hotels",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:393&"
+ },
+ {
+ "title": "Low prices on great hotels",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?"
+ },
+ {
+ "title": "Hotel deals in New York",
+ "url": "/aclk?sa=L&ai=DChsSEwiWi5_yicaSAxV-uykDHe9sE0YYACICCAIQDRoCdGI&co=1&ase=2&gclid=EAIaIQobChMIlouf8onGkgMVfrspAx3vbBNGEAMYASAGEgIB8fD_BwE&cid=CAASugHkaMFohy-mT2lmuGvId03LA7AIRCh_b97fdK_aD4uBjb5P7ztY7b_hKLpyLP8xsp7bv1NqWw9-P8FCMQQsw0NyVCYVNFVt8UoDBbbxpN2by_mEBACDcGYUcrIyLeCCw5PRjlDRwAaRC_ryXzN5wnbSrTMSTEpDORNRjVhrpDuCjV3crjEvhxtE4Zecenso4_zM8rAvXsW_q_LNQ_daZANas_UU5UGIYg618_nyrA0sRXhuU5Qjlxg8VgA&cce=2&category=acrcp_v1_32&sig=AOD64_2r-8XkMa8zCw0x7msbEBZPE57ZaA&adurl=&q="
+ },
+ {
+ "title": "Luxury Hotels",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_destination/15830/en.html?tags=f:1455&"
+ },
+ {
+ "title": "All-inclusive",
+ "url": "https://www.kayak.com/semi/adwordssearch/hotel_general/any/en.html?tags=f:1459&"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 64,
@@ -1284,32 +1292,27 @@
{
"cite": "http://www.booking.com › manhattan › hotels",
"cmpt_rank": 16,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Best Hotels In Manhattan",
- "url": "http://www.booking.com/district/us/new-york/manhattan.en.html?aid=336408%7Bignore%7D;label=district-manhattan"
- },
- {
- "misc": {},
- "text": "",
- "title": "Hotels at Great Prices",
- "url": "https://www.booking.com/go.html?slc=gp;aid=336408;label="
- },
- {
- "misc": {},
- "text": "",
- "title": "Book Now",
- "url": "https://www.booking.com/go.html?slc=bn;aid=336408;label="
- },
- {
- "misc": {},
- "text": "",
- "title": "Top Reviewed Hotels",
- "url": "https://www.booking.com/go.html?slc=r1;aid=336408;label="
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Best Hotels In Manhattan",
+ "url": "http://www.booking.com/district/us/new-york/manhattan.en.html?aid=336408%7Bignore%7D;label=district-manhattan"
+ },
+ {
+ "title": "Hotels at Great Prices",
+ "url": "https://www.booking.com/go.html?slc=gp;aid=336408;label="
+ },
+ {
+ "title": "Book Now",
+ "url": "https://www.booking.com/go.html?slc=bn;aid=336408;label="
+ },
+ {
+ "title": "Top Reviewed Hotels",
+ "url": "https://www.booking.com/go.html?slc=r1;aid=336408;label="
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 65,
@@ -1323,14 +1326,17 @@
{
"cite": null,
"cmpt_rank": 17,
- "details": [
- "Motto by Hilton New York City Chelsea",
- "PUBLIC Hotel",
- "The Evelyn",
- "The Bryant Park Hotel",
- "Hilton Garden Inn NYC Financial Center/Manhattan Downtown",
- "Pod Times Square"
- ],
+ "details": {
+ "items": [
+ "Motto by Hilton New York City Chelsea",
+ "PUBLIC Hotel",
+ "The Evelyn",
+ "The Bryant Park Hotel",
+ "Hilton Garden Inn NYC Financial Center/Manhattan Downtown",
+ "Pod Times Square"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 66,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[6978d0cd767d].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[6978d0cd767d].json
index 65951e8..7baf271 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[6978d0cd767d].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[6978d0cd767d].json
@@ -17,7 +17,8 @@
"details": {
"heading": null,
"img_url": null,
- "text": null
+ "text": null,
+ "type": "panel"
},
"error": null,
"section": "main",
@@ -46,12 +47,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "5 Computer Science Languages to Learn Southern New Hampshire University https://www.snhu.edu › about-us › newsroom › stem › 5... Southern New Hampshire University https://www.snhu.edu › about-us › newsroom › stem › 5...",
- "C++ vs Python - What You Need to Know | KO2 Recruitment KO2 Recruitment https://www.ko2.co.uk › c-plus-plus-vs-python KO2 Recruitment https://www.ko2.co.uk › c-plus-plus-vs-python",
- "Was Elon Musk a coder?",
- "Is 3 months enough for coding?"
- ],
+ "details": {
+ "items": [
+ "5 Computer Science Languages to Learn Southern New Hampshire University https://www.snhu.edu › about-us › newsroom › stem › 5... Southern New Hampshire University https://www.snhu.edu › about-us › newsroom › stem › 5...",
+ "C++ vs Python - What You Need to Know | KO2 Recruitment KO2 Recruitment https://www.ko2.co.uk › c-plus-plus-vs-python KO2 Recruitment https://www.ko2.co.uk › c-plus-plus-vs-python",
+ "Was Elon Musk a coder?",
+ "Is 3 months enough for coding?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -277,7 +281,8 @@
"cmpt_rank": 7,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -336,7 +341,8 @@
"cmpt_rank": 11,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -351,38 +357,31 @@
{
"cite": "https://www.techspaceseducation.org",
"cmpt_rank": 12,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Top Coding Languages in 2026",
- "url": "https://techspaceseducation.org/articleview?articlename=best-programming-languages-2026"
- },
- {
- "misc": {},
- "text": "",
- "title": "Python vs. C++",
- "url": "/aclk?sa=L&ai=DChsSEwjYkba4isaSAxV_hbkFHdf-PDkYACICCAIQBRoCdG0&co=1&ase=2&gclid=EAIaIQobChMI2JG2uIrGkgMVf4W5BR3X_jw5EAMYASAFEgJEwfD_BwE&cid=CAASugHkaNg0uVD75KkcRC-VlkRZnn1HfX6JYiV6_1tnH7ejSCmVXKYfdAcGD7J5WHA5hMV1rt9suvllHHn-69h9HanQhaxy9WDlV5v9ibI6lv3tUIBwCjlkioYYc4coSAgvur38XukJnd3JNMtPDTuyyKe72IUPR9Jmr_CcwXff3J3wLmZOI3uYj-gw-0-_jJbYwev2W8MTYenSBa4DL2Ik2DJaSwwiwUPaniaevKc3y_NVChhDHU9lVM-1p2E&cce=2&category=acrcp_v1_32&sig=AOD64_3XeoWZUPkfKR0NBbHFhrKVVbE7Aw&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Courses",
- "url": "https://techspaceseducation.org/courses"
- },
- {
- "misc": {},
- "text": "",
- "title": "About Us",
- "url": "https://techspaceseducation.org/about"
- },
- {
- "misc": {},
- "text": "",
- "title": "Resources",
- "url": "https://techspaceseducation.org/resources"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Top Coding Languages in 2026",
+ "url": "https://techspaceseducation.org/articleview?articlename=best-programming-languages-2026"
+ },
+ {
+ "title": "Python vs. C++",
+ "url": "/aclk?sa=L&ai=DChsSEwjYkba4isaSAxV_hbkFHdf-PDkYACICCAIQBRoCdG0&co=1&ase=2&gclid=EAIaIQobChMI2JG2uIrGkgMVf4W5BR3X_jw5EAMYASAFEgJEwfD_BwE&cid=CAASugHkaNg0uVD75KkcRC-VlkRZnn1HfX6JYiV6_1tnH7ejSCmVXKYfdAcGD7J5WHA5hMV1rt9suvllHHn-69h9HanQhaxy9WDlV5v9ibI6lv3tUIBwCjlkioYYc4coSAgvur38XukJnd3JNMtPDTuyyKe72IUPR9Jmr_CcwXff3J3wLmZOI3uYj-gw-0-_jJbYwev2W8MTYenSBa4DL2Ik2DJaSwwiwUPaniaevKc3y_NVChhDHU9lVM-1p2E&cce=2&category=acrcp_v1_32&sig=AOD64_3XeoWZUPkfKR0NBbHFhrKVVbE7Aw&adurl=&q="
+ },
+ {
+ "title": "Courses",
+ "url": "https://techspaceseducation.org/courses"
+ },
+ {
+ "title": "About Us",
+ "url": "https://techspaceseducation.org/about"
+ },
+ {
+ "title": "Resources",
+ "url": "https://techspaceseducation.org/resources"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 23,
@@ -396,14 +395,17 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [
- "Programming language",
- "Python",
- "SQL",
- "High-level programming language",
- "Computer programming",
- "Operating system"
- ],
+ "details": {
+ "items": [
+ "Programming language",
+ "Python",
+ "SQL",
+ "High-level programming language",
+ "Computer programming",
+ "Operating system"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 24,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[6aa70651b0cd].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[6aa70651b0cd].json
index d520c4a..4b66c0e 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[6aa70651b0cd].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[6aa70651b0cd].json
@@ -17,77 +17,54 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=ZReFaf6SHo2uqtsP2fD5kAc&mstk=AUtExfD1Uiel88dwC18JdCTfYdXjwoPty236E8Y5qjP7fqrWL-l9QFxc-_4pZv4nzYZy5N6dxpmDFem9bzIIINb-w2zN-HicZPdy1gx5-eGstkHJ4yhwO8WxkTJXFBAK5PNPtg8&csui=3&ved=2ahUKEwjY746CscOSAxW7m2oFHQ88KZ0QgK4QegYIAQgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/story/why-is-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.instagram.com/reel/DIZJjPvOtIN/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.rmg.co.uk/stories/space-astronomy/why-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wsav.com/weather-news/why-the-sky-looks-bluer-in-fall-and-winter/#:~:text=In%20summer%2C%20the%20sun%20sits%20higher%20in,intensifying%20the%20blue%20appearance%20of%20the%20sky."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wsav.com/weather-news/why-the-sky-looks-bluer-in-fall-and-winter/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -105,12 +82,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "What is the true color of our sky?",
- "How to explain to a kid why the sky is blue?",
- "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "What is the true color of our sky?",
+ "How to explain to a kid why the sky is blue?",
+ "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -210,7 +190,8 @@
"cmpt_rank": 6,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -561,7 +542,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 33,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[6e206db14899].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[6e206db14899].json
index 47141f5..0fddb3c 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[6e206db14899].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[6e206db14899].json
@@ -17,155 +17,106 @@
"details": {
"heading": "Interactive Online Tutorials (No Installation Required) ",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "W3Schools Python Tutorial",
- "title": "",
"url": "https://www.w3schools.com/python/"
},
{
- "misc": {},
"text": "LearnPython.org",
- "title": "",
"url": "https://www.learnpython.org/"
},
{
- "misc": {},
"text": "futurecoder",
- "title": "",
"url": "https://futurecoder.io/"
},
{
- "misc": {},
"text": "Official Python Beginner's Guide",
- "title": "",
"url": "https://wiki.python.org/moin/BeginnersGuide"
},
{
- "misc": {},
"text": "python.org downloads page",
- "title": "",
"url": "https://www.python.org/downloads/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.w3schools.com/python/#:~:text=Module%20Reference,What%20is%20this?"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.forbes.com/advisor/education/it-and-tech/how-to-learn-python/#:~:text=Users%20can%20find%20stand%2Dalone,lessons%20at%20your%20own%20speed."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/learnpython/comments/10k55u9/best_resources_to_study_python/#:~:text=Best%20resources%20to%20study%20Python,hattorihanzo14"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://wiki.python.org/moin/BeginnersGuide#:~:text=Getting%20Python,list%20of%20Non%2DEnglish%20resources."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/learnpython/comments/1ilgq0w/best_beginners_course_for_learning_python_in_2025/#:~:text=Comments%20Section,OP%20%E2%80%A2%201y%20ago"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/learnpython/comments/1e4pvji/interactive_free_websites_to_learn_python/#:~:text=Freecodecamp%20have%20a%20huge%20youtube,Python%20syntax%20one%20(W3schools?)"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/learnpython/comments/10rlgnu/recommended_free_online_python_courses/#:~:text=Exercises:,Codingame%2C%20Codecombat%20%E2%80%94%20gaming%20based%20challenges"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.learnpython.org/#:~:text=Learn%20Python%20%2D%20Free%20Interactive%20Python,Printing%20on%20screen"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://futurecoder.io/#:~:text=About.%20futurecoder%20is%20a%20free%20and%20open%2Dsource,beginners%20to%20teach%20themselves%20programming%20in%20Python."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/learnpython/comments/18l6y75/how_to_start_python_for_a_complete_noob/#:~:text=The%20key%20is%20to%20stay,online%20or%20create%20your%20own."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.python.org/about/gettingstarted/#:~:text=Installing%20Python%20is%20generally%20easy,dry)%20explanation%20of%20Python's%20syntax."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.coursereport.com/blog/the-best-python-tutorial-for-beginners-top-10-list"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.ko2.co.uk/c-plus-plus-vs-python/#:~:text=Python's%20syntax%20is%20a%20lot,to%20get%20to%20grips%20with."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://wiki.python.org/moin/BeginnersGuide/NonProgrammers#:~:text=Tutorials%20and%20Websites&text=Afternerd%2C%20by%20Karim%20Elghamrawy%2C%20is,/server%20introduction%2C%20with%20videos.&text=Letsfindcourse%20%2D%20Python:%20Best%20Python%20tutorials%20and%20courses%20recommended%20by%20experts.&text=Learn%20Python%20An%20Introductory%20yet,Questions%20and%20Answers%20with%20Examples."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://careerkarma.com/blog/how-to-learn-python/#:~:text=No%2C%20it%20(%20Python%20programming%20language%20),for%20learning%20Python%20programming%2C%20even%20for%20free."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://kinsta.com/blog/python-tutorials/#:~:text=In%20addition%2C%20you%20can%20use%20their%20(,by%20yourself%20and%20then%20view%20the%20results."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://realpython.com/python-idle/#:~:text=Python%20IDLE%20is%20Python's%20default%20integrated%20development,to%20write%2C%20edit%2C%20and%20execute%20Python%20code."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/groups/selftaughtprogrammers/posts/919833915047010/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -519,7 +470,7 @@
{
"cite": "https://www.pythoninstitute.org",
"cmpt_rank": 11,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 25,
@@ -533,14 +484,17 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [
- "Python",
- "JavaScript",
- "Computer programming",
- "Java",
- "C++",
- "PHP"
- ],
+ "details": {
+ "items": [
+ "Python",
+ "JavaScript",
+ "Computer programming",
+ "Java",
+ "C++",
+ "PHP"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 26,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[6e401e618433].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[6e401e618433].json
index 7de280b..d0ac5f8 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[6e401e618433].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[6e401e618433].json
@@ -14,38 +14,31 @@
{
"cite": "https://www.expedia.com/",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Round Trip Flights",
- "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypeflight%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "One-Way as Low as $27",
- "url": "/aclk?sa=L&pf=1&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAEQDxoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYASAFEgLJG_D_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_0uSRwNzYkLfUeyLVUPmUa0kP8MhA&adurl=https://www.expedia.com/Cheap-Flights-To-New-York.d178293.Travel-Guide-Flights%3Flocale%3Den_US%26siteid%3D1%26semcid%3DUS.MULTILOBF.GOOGLE.DT-c-EN.FLIGHT%26semdtl%3Da119825426710.b1146786747506.g1kwd-28177180.e1c.m1EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYASAFEgLJG_D_BwE.r1.c1.j19031967.k1.d1652478949003.h1e.i1.l1.n1.o1.p1.q1.s1cheap%2520flights%2520to%2520new%2520york.t1.x1.f1.u1.v1.w1%26gad_source%3D1%26gad_campaignid%3D19825426710%26gbraid%3D0AAAAACTxZ9ZlCUFi7eALxavi028qKk-p3%26gclid%3DEAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYASAFEgLJG_D_BwE&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Last Minute Flight Deals",
- "url": "https://www.expedia.com/Cheap-Flights-To-New-York.d178293.Travel-Guide-Flights?locale=en_US&siteid=1"
- },
- {
- "misc": {},
- "text": "",
- "title": "Package Deals",
- "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypepackage%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Hotel Deals",
- "url": "https://www.expedia.com/Hotel-Search?regionId=%7B_regionid%7D&locale=en_US&siteid=1&sort=PRICE_LOW_TO_HIGH&useRewards=false&sl=hdl"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Round Trip Flights",
+ "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypeflight%7D"
+ },
+ {
+ "title": "One-Way as Low as $27",
+ "url": "/aclk?sa=L&pf=1&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAEQDxoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYASAFEgLJG_D_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_0uSRwNzYkLfUeyLVUPmUa0kP8MhA&adurl=https://www.expedia.com/Cheap-Flights-To-New-York.d178293.Travel-Guide-Flights%3Flocale%3Den_US%26siteid%3D1%26semcid%3DUS.MULTILOBF.GOOGLE.DT-c-EN.FLIGHT%26semdtl%3Da119825426710.b1146786747506.g1kwd-28177180.e1c.m1EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYASAFEgLJG_D_BwE.r1.c1.j19031967.k1.d1652478949003.h1e.i1.l1.n1.o1.p1.q1.s1cheap%2520flights%2520to%2520new%2520york.t1.x1.f1.u1.v1.w1%26gad_source%3D1%26gad_campaignid%3D19825426710%26gbraid%3D0AAAAACTxZ9ZlCUFi7eALxavi028qKk-p3%26gclid%3DEAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYASAFEgLJG_D_BwE&q="
+ },
+ {
+ "title": "Last Minute Flight Deals",
+ "url": "https://www.expedia.com/Cheap-Flights-To-New-York.d178293.Travel-Guide-Flights?locale=en_US&siteid=1"
+ },
+ {
+ "title": "Package Deals",
+ "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypepackage%7D"
+ },
+ {
+ "title": "Hotel Deals",
+ "url": "https://www.expedia.com/Hotel-Search?regionId=%7B_regionid%7D&locale=en_US&siteid=1&sort=PRICE_LOW_TO_HIGH&useRewards=false&sl=hdl"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 0,
@@ -59,32 +52,27 @@
{
"cite": "https://www.priceline.com",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Deals on Round Trip Flights",
- "url": "https://www.priceline.com/r/?product=air&theme=citystate&lp=y&match=%7BMatchType%7D&kw=%7Bkeyword%7D&adp=%7Badposition%7D&destination=LGA&refid=PLGOOGLECPC&refclickid=D:%7BDevice%7DFlight16163556670%7BNetwork%7D%7BCreative%7D%7Bcampaignid%7D%7Btargetid%7D%7C%7Bloc_physical_ms%7D%7C%7Badposition%7D%7Bignore%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Book Cheap Round Trip Flights",
- "url": "https://www.priceline.com/r/?product=air&theme=citystate&lp=y&match=%7BMatchType%7D&kw=%7Bkeyword%7D&adp=%7Badposition%7D&destination=LGA&refid=PLGOOGLECPC&refclickid=D:%7BDevice%7DFlight16163556670%7BNetwork%7D%7BCreative%7D%7Bcampaignid%7D%7Btargetid%7D%7C%7Bloc_physical_ms%7D%7C%7Badposition%7D%7Bignore%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Last Minute Flights",
- "url": "https://www.priceline.com/r/?product=ASL&theme=SL&SLID=10026&semprod=air&refid=PLGOOGLECPCSL&refclickid=D:%7Bdevice%7D%7C%7BCampaignId%7D%7C%7BCreative%7D%7C%7BKeyword%7D%7C%7BMatchtype%7D%7C%7BNetwork%7D%7C%7Btargetid%7D%7C%7Bloc_physical_ms%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Express Deals®",
- "url": "https://www.priceline.com/r/?product=APSL&theme=SL&SLID=10009&semprod=air&refid=PLGOOGLECPCSL&refclickid=D:%7Bdevice%7D%7C%7BCampaignId%7D%7C%7BCreative%7D%7C%7BKeyword%7D%7C%7BMatchtype%7D%7C%7BNetwork%7D%7C%7Btargetid%7D%7C%7Bloc_physical_ms%7D"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Deals on Round Trip Flights",
+ "url": "https://www.priceline.com/r/?product=air&theme=citystate&lp=y&match=%7BMatchType%7D&kw=%7Bkeyword%7D&adp=%7Badposition%7D&destination=LGA&refid=PLGOOGLECPC&refclickid=D:%7BDevice%7DFlight16163556670%7BNetwork%7D%7BCreative%7D%7Bcampaignid%7D%7Btargetid%7D%7C%7Bloc_physical_ms%7D%7C%7Badposition%7D%7Bignore%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ },
+ {
+ "title": "Book Cheap Round Trip Flights",
+ "url": "https://www.priceline.com/r/?product=air&theme=citystate&lp=y&match=%7BMatchType%7D&kw=%7Bkeyword%7D&adp=%7Badposition%7D&destination=LGA&refid=PLGOOGLECPC&refclickid=D:%7BDevice%7DFlight16163556670%7BNetwork%7D%7BCreative%7D%7Bcampaignid%7D%7Btargetid%7D%7C%7Bloc_physical_ms%7D%7C%7Badposition%7D%7Bignore%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ },
+ {
+ "title": "Last Minute Flights",
+ "url": "https://www.priceline.com/r/?product=ASL&theme=SL&SLID=10026&semprod=air&refid=PLGOOGLECPCSL&refclickid=D:%7Bdevice%7D%7C%7BCampaignId%7D%7C%7BCreative%7D%7C%7BKeyword%7D%7C%7BMatchtype%7D%7C%7BNetwork%7D%7C%7Btargetid%7D%7C%7Bloc_physical_ms%7D"
+ },
+ {
+ "title": "Express Deals®",
+ "url": "https://www.priceline.com/r/?product=APSL&theme=SL&SLID=10009&semprod=air&refid=PLGOOGLECPCSL&refclickid=D:%7Bdevice%7D%7C%7BCampaignId%7D%7C%7BCreative%7D%7C%7BKeyword%7D%7C%7BMatchtype%7D%7C%7BNetwork%7D%7C%7Btargetid%7D%7C%7Bloc_physical_ms%7D"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -98,32 +86,27 @@
{
"cite": "https://www.kayak.com",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Flight deals to New York",
- "url": "https://www.kayak.com/semi/adwordssearch/flight_destination/15830/en.html?"
- },
- {
- "misc": {},
- "text": "",
- "title": "Find our cheapest flights",
- "url": "https://www.kayak.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1230&"
- },
- {
- "misc": {},
- "text": "",
- "title": "Flights to New York",
- "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAEQDBoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYAyAEEgIh4vD_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_0hKW2YfFrWWfOczsD8xLSSV0Zd6Q&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "One-way flights",
- "url": "https://www.kayak.com/semi/adwordssearch/flight_general/any/en.html?cp_ft=ow&tags=m:658&"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Flight deals to New York",
+ "url": "https://www.kayak.com/semi/adwordssearch/flight_destination/15830/en.html?"
+ },
+ {
+ "title": "Find our cheapest flights",
+ "url": "https://www.kayak.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1230&"
+ },
+ {
+ "title": "Flights to New York",
+ "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAEQDBoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYAyAEEgIh4vD_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_0hKW2YfFrWWfOczsD8xLSSV0Zd6Q&adurl=&q="
+ },
+ {
+ "title": "One-way flights",
+ "url": "https://www.kayak.com/semi/adwordssearch/flight_general/any/en.html?cp_ft=ow&tags=m:658&"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -137,32 +120,27 @@
{
"cite": "https://www.southwest.com",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Shop Our Lowest Ticket Prices",
- "url": "https://www.southwest.com/en/flights/flights-to-new-york"
- },
- {
- "misc": {},
- "text": "",
- "title": "Find Best Flight Deals",
- "url": "https://www.southwest.com/en/flights/flight-deals"
- },
- {
- "misc": {},
- "text": "",
- "title": "Shop Our Low Fares Today",
- "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAEQFBoCbHI&ae=2&aspm=1&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYBCAEEgJJh_D_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_35&sig=AOD64_0zJc4VHdbTnGf3Wr1MeWK0MyiDFA&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Featured Offers",
- "url": "http://www.southwest.com/special-offers/featured-offers/"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Shop Our Lowest Ticket Prices",
+ "url": "https://www.southwest.com/en/flights/flights-to-new-york"
+ },
+ {
+ "title": "Find Best Flight Deals",
+ "url": "https://www.southwest.com/en/flights/flight-deals"
+ },
+ {
+ "title": "Shop Our Low Fares Today",
+ "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAEQFBoCbHI&ae=2&aspm=1&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAAYBCAEEgJJh_D_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_35&sig=AOD64_0zJc4VHdbTnGf3Wr1MeWK0MyiDFA&adurl=&q="
+ },
+ {
+ "title": "Featured Offers",
+ "url": "http://www.southwest.com/special-offers/featured-offers/"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 3,
@@ -344,7 +322,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 16,
@@ -358,38 +336,31 @@
{
"cite": "https://www.cheapflights.com",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Flight deals to New York",
- "url": "https://www.cheapflights.com/semi/adwordssearch/flight_destination/15830/en.html?"
- },
- {
- "misc": {},
- "text": "",
- "title": "Find our cheapest flights",
- "url": "https://www.cheapflights.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1230&"
- },
- {
- "misc": {},
- "text": "",
- "title": "Flights to New York",
- "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAIQChoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAMYASAFEgJyCvD_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_0a3o4i9nMWdYUK3IfzMtOU6fbq8Q&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "One-way flights",
- "url": "https://www.cheapflights.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1126&"
- },
- {
- "misc": {},
- "text": "",
- "title": "Fly today",
- "url": "https://www.cheapflights.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1228&"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Flight deals to New York",
+ "url": "https://www.cheapflights.com/semi/adwordssearch/flight_destination/15830/en.html?"
+ },
+ {
+ "title": "Find our cheapest flights",
+ "url": "https://www.cheapflights.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1230&"
+ },
+ {
+ "title": "Flights to New York",
+ "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAIQChoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAMYASAFEgJyCvD_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_0a3o4i9nMWdYUK3IfzMtOU6fbq8Q&adurl=&q="
+ },
+ {
+ "title": "One-way flights",
+ "url": "https://www.cheapflights.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1126&"
+ },
+ {
+ "title": "Fly today",
+ "url": "https://www.cheapflights.com/semi/adwordssearch/flight_general/any/en.html?tags=m:1228&"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 17,
@@ -403,38 +374,31 @@
{
"cite": "https://www.expedia.com/",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Round Trip Flights",
- "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypeflight%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "One-Way as Low as $27",
- "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAIQCxoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAMYAiAFEgIksPD_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_1frYAxcSpBOBMc7qMc5wSr_9KD5w&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Last Minute Flight Deals",
- "url": "https://www.expedia.com/Cheap-Flights-To-New-York.d178293.Travel-Guide-Flights?locale=en_US&siteid=1"
- },
- {
- "misc": {},
- "text": "",
- "title": "Package Deals",
- "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypepackage%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Hotel Deals",
- "url": "https://www.expedia.com/Hotel-Search?regionId=%7B_regionid%7D&locale=en_US&siteid=1&sort=PRICE_LOW_TO_HIGH&useRewards=false&sl=hdl"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Round Trip Flights",
+ "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypeflight%7D"
+ },
+ {
+ "title": "One-Way as Low as $27",
+ "url": "/aclk?sa=L&ai=DChsSEwiow6mMhsaSAxWeU5EFHZWNCrgYACICCAIQCxoCbHI&co=1&ase=2&gclid=EAIaIQobChMIqMOpjIbGkgMVnlORBR2VjQq4EAMYAiAFEgIksPD_BwE&cid=CAASugHkaFfijjxOC89cbjyQKg4HDrjTTPOy1b_5mJoWjHESV3WCutnQZQdM0uhJ_FxMi08weopog1m7PG66QEYEpTsVjd-SMZjFGJ6Pilix2ntNr0vJpW_IBzFv0SgOyH6V5tZo6D9iZVvJ4HZCG6l5xMI7VMppQMNlN_JKvHnQfoJtNxqBTCq6U6RAf8XYXVT7eSzNoVHdkBfeZ3P-QaphNoZtgFTePcsRSwpy25fF5sXE3u02lPU69Wr5hmM&cce=2&category=acrcp_v1_32&sig=AOD64_1frYAxcSpBOBMc7qMc5wSr_9KD5w&adurl=&q="
+ },
+ {
+ "title": "Last Minute Flight Deals",
+ "url": "https://www.expedia.com/Cheap-Flights-To-New-York.d178293.Travel-Guide-Flights?locale=en_US&siteid=1"
+ },
+ {
+ "title": "Package Deals",
+ "url": "https://www.expedia.com/%7B_destprefix%7D%7B_regionid%7D%7B_pagetypepackage%7D"
+ },
+ {
+ "title": "Hotel Deals",
+ "url": "https://www.expedia.com/Hotel-Search?regionId=%7B_regionid%7D&locale=en_US&siteid=1&sort=PRICE_LOW_TO_HIGH&useRewards=false&sl=hdl"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 18,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[7049404a2dd6].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[7049404a2dd6].json
index 03ee9ec..b2fc495 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[7049404a2dd6].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[7049404a2dd6].json
@@ -17,65 +17,46 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&google_abuse=GOOGLE_ABUSE_EXEMPTION%3DID%3D5dafb1175885f93b%3ATM%3D1768890719%3AC%3D%3E%3AIP%3D108.247.126.23-%3AS%3DO1MBTYyklSBJpDcySqRlQw%3B+path%3D%2F%3B+domain%3Dgoogle.com%3B+expires%3DTue%2C+20-Jan-2026+09%3A31%3A59+GMT&sei=YCFvaebyELOHptQP-7PO2Q0&ved=2ahUKEwjYlca4v5mSAxXBmYkEHXMpKecQgK4QegYIAAgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://spaceplace.nasa.gov/blue-sky/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue#:~:text=The%20sky%20appears%20blue%20because%20of%20the,we%20can%20see%20look%20blue%20or%20violet**"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.montrealsciencecentre.com/blog/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.rmg.co.uk/stories/space-astronomy/why-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -93,12 +74,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "How to explain to a kid why the sky is blue?",
- "Why the sky looks bluer in fall and winter - WSAV-TV WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo... WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo...",
- "Why is the sky Blue? desy.de https://www.desy.de › user › projects › Physics › General desy.de https://www.desy.de › user › projects › Physics › General"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "How to explain to a kid why the sky is blue?",
+ "Why the sky looks bluer in fall and winter - WSAV-TV WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo... WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo...",
+ "Why is the sky Blue? desy.de https://www.desy.de › user › projects › Physics › General desy.de https://www.desy.de › user › projects › Physics › General"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -352,7 +336,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -369,7 +354,8 @@
"cmpt_rank": 9,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -386,7 +372,8 @@
"cmpt_rank": 10,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -415,7 +402,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 23,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[7333536d2911].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[7333536d2911].json
index 7cf2e29..ca3d909 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[7333536d2911].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[7333536d2911].json
@@ -17,83 +17,58 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Chlorophyll Degradation",
- "title": "",
"url": "https://www.google.com/search?q=Chlorophyll+Degradation&sei=EzGGadziB-2i0PEP7eiguAU&ved=2ahUKEwiOiNrSvcWSAxVbePUHHYrbKukQgK4QegYIAQgAEAo"
},
{
- "misc": {},
"text": "Ethylene Gas",
- "title": "",
"url": "https://www.google.com/search?q=Ethylene+Gas&sei=EzGGadziB-2i0PEP7eiguAU&ved=2ahUKEwiOiNrSvcWSAxVbePUHHYrbKukQgK4QegYIAQgAEAw"
},
{
- "misc": {},
"text": "Visual Recognition",
- "title": "",
"url": "https://www.google.com/search?q=Visual+Recognition&sei=EzGGadziB-2i0PEP7eiguAU&ved=2ahUKEwiOiNrSvcWSAxVbePUHHYrbKukQgK4QegYIAQgCEAE"
},
{
- "misc": {},
"text": "Ripening Process",
- "title": "",
"url": "https://www.google.com/search?q=Ripening+Process&sei=EzGGadziB-2i0PEP7eiguAU&ved=2ahUKEwiOiNrSvcWSAxVbePUHHYrbKukQgK4QegYIAQgCEAM"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=0WCErY3OYng"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.scienceabc.com/nature/bananas-change-colour-upon-ripening.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://pekoproduce.com/blogs/produce-nutrition/green-to-yellow-to-spotty-how-do-bananas-ripen"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.quora.com/Why-are-bananas-yellow"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.ck12.org/flexi/physical-science/Light-in-Physics/what-makes-bananas-yellow/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://pace.oceansciences.org/color_determination.cgi#:~:text=A%20banana%20appears%20yellow%20to%20the%20human,our%20brain%20recognizes%20as%20a%20%22YELLOW%20BANANA!%22%22"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://pace.oceansciences.org/color_determination.cgi"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -237,12 +212,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Why are bananas yellow? : r/answers - Reddit Reddit https://www.reddit.com › answers › comments › why_a... Reddit https://www.reddit.com › answers › comments › why_a...",
- "Cavendish banana - Wikipedia Wikipedia https://en.wikipedia.org › wiki › Cavendish_banana Wikipedia https://en.wikipedia.org › wiki › Cavendish_banana",
- "Going Bananas: Your Complete Ripe Banana Guide - The FruitGuys The FruitGuys https://fruitguys.com › blog › going-bananas The FruitGuys https://fruitguys.com › blog › going-bananas",
- "What color is the healthiest banana?"
- ],
+ "details": {
+ "items": [
+ "Why are bananas yellow? : r/answers - Reddit Reddit https://www.reddit.com › answers › comments › why_a... Reddit https://www.reddit.com › answers › comments › why_a...",
+ "Cavendish banana - Wikipedia Wikipedia https://en.wikipedia.org › wiki › Cavendish_banana Wikipedia https://en.wikipedia.org › wiki › Cavendish_banana",
+ "Going Bananas: Your Complete Ripe Banana Guide - The FruitGuys The FruitGuys https://fruitguys.com › blog › going-bananas The FruitGuys https://fruitguys.com › blog › going-bananas",
+ "What color is the healthiest banana?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 10,
@@ -342,7 +320,8 @@
"cmpt_rank": 9,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -385,7 +364,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 20,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[7ad9715f3597].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[7ad9715f3597].json
index 4d491fc..3800636 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[7ad9715f3597].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[7ad9715f3597].json
@@ -18,11 +18,10 @@
"heading": null,
"img_url": null,
"text": "Feedback",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "More info",
- "title": "",
"url": "https://support.google.com/websearch/answer/3284611?hl=en#unitconverter"
}
]
@@ -40,12 +39,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "What is 100 degrees Fahrenheit in Celsius? [Solved] - Cuemath Cuemath https://www.cuemath.com › questions › what-is-100-deg... Cuemath https://www.cuemath.com › questions › what-is-100-deg...",
- "What is 100F in Celsius fever?",
- "How do you convert F to C easily?",
- "How much is 110 in Celsius?"
- ],
+ "details": {
+ "items": [
+ "What is 100 degrees Fahrenheit in Celsius? [Solved] - Cuemath Cuemath https://www.cuemath.com › questions › what-is-100-deg... Cuemath https://www.cuemath.com › questions › what-is-100-deg...",
+ "What is 100F in Celsius fever?",
+ "How do you convert F to C easily?",
+ "How much is 110 in Celsius?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -241,7 +243,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 15,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[7b89c00120e3].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[7b89c00120e3].json
index 2de2ef8..261819b 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[7b89c00120e3].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[7b89c00120e3].json
@@ -31,71 +31,50 @@
"details": {
"heading": "Results for Palo Alto, CA 94301",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.google.com/intl/en_us/googlefinance/disclaimer"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://seekingalpha.com/news/4548563-apple-heads-to-moon-after-nasa-chief-approves-iphones-for-upcoming-missions#:~:text=@javkoza%20In%20August%202025%2C%20Apple,Winning."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.inc.com/ava-levinson/tim-cook-apple-employees-immigration/91298666#:~:text=The%20Apple%20CEO%20pledged%20to,promise%20of%20calling%20for%20change."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.gurufocus.com/news/8591738/apple-aapl-sees-increased-options-activity-amid-bullish-sentiment#:~:text=Apple's%20iPhone%20makes%20up%20a,build%20its%20products%20and%20chips."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/money/Apple-Inc#:~:text=Encyclop%C3%A6dia%20Britannica%2C%20Inc.-,Key%20products%20and%20services,The%20genesis%20of%20Apple%20Inc."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://finance.yahoo.com/quote/AAPL/profile/#:~:text=In%20addition%2C%20the%20company%20offers,:%201;%20Compensation:%203."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.ebsco.com/research-starters/computer-science/apple-inc#:~:text=In%202007%2C%20Apple%20Computer%20changed,marketplaces%20to%20support%20these%20devices."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.tradingview.com/symbols/NASDAQ-AAPL/#:~:text=Apple%2C%20Inc%20engages%20in%20the,is%20headquartered%20in%20Cupertino%2C%20CA."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.globaldata.com/company-profile/apple-inc/#:~:text=Apple%20Inc%20(Apple)%20designs%2C,Cupertino%2C%20California%2C%20the%20US."
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -141,12 +120,15 @@
{
"cite": null,
"cmpt_rank": 4,
- "details": [
- "What if I invested $1000 in Apple in 1997?",
- "What is the Apple Inc?",
- "Apple Inc. - Wikipedia Wikipedia https://en.wikipedia.org › wiki › Apple_Inc Wikipedia https://en.wikipedia.org › wiki › Apple_Inc",
- "What does inc stand for in Apple?"
- ],
+ "details": {
+ "items": [
+ "What if I invested $1000 in Apple in 1997?",
+ "What is the Apple Inc?",
+ "Apple Inc. - Wikipedia Wikipedia https://en.wikipedia.org › wiki › Apple_Inc Wikipedia https://en.wikipedia.org › wiki › Apple_Inc",
+ "What does inc stand for in Apple?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 4,
@@ -162,6 +144,7 @@
"cmpt_rank": 5,
"details": {
"directions": "/maps/dir//Apple+Inc.,+1070+E+Arques+Ave,+Sunnyvale,+CA+94085/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fb6196e0a7497:0xc4162eb5b073a9e4?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://apple.com/"
},
"error": null,
@@ -179,6 +162,7 @@
"cmpt_rank": 5,
"details": {
"directions": "/maps/dir//Apple,+10500+N+De+Anza+Blvd,+Cupertino,+CA+95014/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fb5b13fdcecf7:0xdc5c21978a3d43f5?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "http://apple.com/"
},
"error": null,
@@ -196,6 +180,7 @@
"cmpt_rank": 5,
"details": {
"directions": "/maps/dir//Apple+Palo+Alto,+340+University+Ave,+Palo+Alto,+CA+94301/data=!4m6!4m5!1m1!4e2!1m2!1m1!1s0x808fbb38fa3f36f9:0xf5692002b984147?sa=X&ved=1t:57443&ictx=111",
+ "type": "ratings",
"website": "https://www.apple.com/retail/paloalto?cid=aos-us-seo-maps"
},
"error": null,
@@ -281,7 +266,7 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 13,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[7d76d3a83ebc].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[7d76d3a83ebc].json
index 37860ac..31cca86 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[7d76d3a83ebc].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[7d76d3a83ebc].json
@@ -17,35 +17,26 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=bd16726c6a722966&q=donald+trump&udm=2&source=univ&sa=X&ved=2ahUKEwjLqraipMSSAxVrSjABHWpuH8oQnN8JegQIFhAD"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://x.com/realDonaldTrump"
},
{
- "misc": {},
"text": "Age|79 years|Jun 14, 1946",
- "title": "",
"url": "/search?sca_esv=bd16726c6a722966&q=donald+trump+age&stick=H4sIAAAAAAAAAOPgE-LUz9U3SC4ssTTQEs1OttIvSM0vyEkFUkXF-XlWiempi1gFUvLzEnNSFEqKSnMLFIBCAMqYnZc3AAAA&sa=X&ved=2ahUKEwjLqraipMSSAxVrSjABHWpuH8oQ18AJegQILhAB"
},
{
- "misc": {},
"text": "Party|Republican Party",
- "title": "",
"url": "/search?sca_esv=bd16726c6a722966&q=Republican+Party&si=AL3DRZHmwLjWhgnaPB3UTu10R6S5qNLXiQiKMeezfKyB1FMsRrUdcsLX7-nPSpyKZYIqfOJDBGOdz2E57-99eIANe1Pl8wwO7G3XqVt3qiEO4uERO4KYYXvGXEXRT6jOHlGcixAxcLt6zGSb8XfSKPYd5JeDPjiWx1lTb4E4mlK9t2l0ky4-1M_qoJaYHMURBGCmYARFqYnA&sa=X&ved=2ahUKEwjLqraipMSSAxVrSjABHWpuH8oQ18AJegQILxAB"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/DonaldTrump/posts/1470862751066207/"
}
]
@@ -553,7 +544,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 36,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[811a27f92284].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[811a27f92284].json
index ad1a6e0..abb4c1d 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[811a27f92284].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[811a27f92284].json
@@ -17,41 +17,30 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift&udm=2&source=univ&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQnN8JegQIFhAD"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.instagram.com/p/DUatriyDtst/"
},
{
- "misc": {},
"text": "Age|36 years|Dec 13, 1989",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+age&stick=H4sIAAAAAAAAAOPgE-LUz9U3SMkxNTPXEs1OttIvSM0vyEkFUkXF-XlWiempi1gFShIrc_KLFIrLM9NKFIBCAP7tyqs3AAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQ18AJegQIShAB"
},
{
- "misc": {},
"text": "Andrea Swift",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Andrea+Swift&si=AL3DRZGDMkmBg1SB5TH8o8Xeh03tgmwpgZCgiYi5BFB_ELNOTPBFeqDxNTGvvh1hPYXqlIjF22dcJi0Yu82oPUXxzDxir3wgkS2kyfPjT2d9orDXslkUkYK7OzdfWkB9RAz2F_nOw82Mcd9TUz4zCy-FAEYoaE-hz5K4tA2aFCRTu5jxMqrcqe0UCeKV32Yg3OqTVUTYbySC8oV8Fb1UcCnkzjmIJqsSepRL_ufkeJOR2ceTk-kWOaadcgzyxvzKtWxA5pEeojnijaM46J9b121z4q7bG8JZ_g%3D%3D&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQkNQMegQISxAH"
},
{
- "misc": {},
"text": "Scott Kingsley Swift",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Scott+Kingsley+Swift&si=AL3DRZGDMkmBg1SB5TH8o8Xeh03tgmwpgZCgiYi5BFB_ELNOTN6vRwy0BG2MW1rZPSjwgUj9uQ6Lqjf5EvnuYmUprBeVRcqZwU4RVGuga04tF6dV3uws1opDOSWuZ0Odx3tQnYEe4FMjCJ2I27DEVmFgfY83IaI2WH1C4h50MtXKKKlT0Zf0oZLiK2dlMADELL-GyPYFihLU-CQL9ZyUtLiTTNIl4a90jLtKCk_iqmof4Ir7GNPxeV_Trk26L47oLJKzt61AKGxX-grhVcBBLU4TkU5pbd5NuA%3D%3D&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQkNQMegQISxAK"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.taylorswift.com/"
}
]
@@ -184,53 +173,38 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": " |Songs| ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+songs&si=AL3DRZEDa4TxC6hVrAyLaFsBpat_oYcv1YoQ-SV-BLdEfYRU2jFQCELiakCARJTIHJU96QYMshrO7PGgLVU7zkfOiqhB9BTnw_9l4buf5_RYB3fSdtPSlXVuu0e9rN0pBAbDuMoat2iq&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQw_oBegQIdRAB&ictx=1"
},
{
- "misc": {},
"text": "Cruel Summer|Lover|·|2019",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+cruel+summer&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0TMuNzzU2LDbREvQtLc5MdiwqySwuCckPzs9LX8QqWZJYmZNfpFBcnplWopBcVJqao1BcmpubWgQAAOToVksAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQri56BAhcEAQ"
},
{
- "misc": {},
"text": "Shake it Off|1989|·|2014",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+shake+it+off&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0TKqKj88ys0zSEvQtLc5MdiwqySwuCckPzs9LX8QqWZJYmZNfpFBcnplWolCckZidqpBZopCflgYA4-3GsksAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQri56BAhcEAc"
},
{
- "misc": {},
"text": "Blank Space|1989|·|2014",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+blank+space&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0TDI3s7SsMjHTEvQtLc5MdiwqySwuCckPzs9LX8QqUZJYmZNfpFBcnplWopCUk5iXrVBckJicCgD5m87kSgAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQri56BAhcEAo"
},
{
- "misc": {},
"text": "Love Story|Fearless|·|2008",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+love+story&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tFP1zcsNM0yKbNML9AS9C0tzkx2LCrJLC4JyQ_Oz0tfxCpekliZk1-kUFyemVaikJNflqpQXJJfVAkAimGdE0gAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQri56BAhcEA0"
},
{
- "misc": {},
"text": "Love Story|Fearless (Taylor's Version)|·|2021",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+love+story&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0TDMyM4s3MCjQEvQtLc5MdiwqySwuCckPzs9LX8QqXpJYmZNfpFBcnplWopCTX5aqUFySX1QJAK_7p0pJAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQri56BAhcEBA"
},
{
- "misc": {},
"text": "Lover|Lover|·|2019",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+lover&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0zKiqSMsyyi7WEvQtLc5MdiwqySwuCckPzs9LX8QqVJJYmZNfpFBcnplWopCTX5ZaBABm9t5pRAAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQri56BAhcEBM"
},
{
- "misc": {},
"text": "View 20+ more",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+songs&si=AL3DRZEDa4TxC6hVrAyLaFsBpat_oYcv1YoQ-SV-BLdEfYRU2jFQCELiakCARJTIHJU96QYMshrO7PGgLVU7zkfOiqhB9BTnw_9l4buf5_RYB3fSdtPSlXVuu0e9rN0pBAbDuMoat2iq&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQxYkLKAF6BAhcEBU"
}
]
@@ -724,12 +698,15 @@
{
"cite": null,
"cmpt_rank": 6,
- "details": [
- "What health concerns does Taylor Swift have?",
- "Who is Taylor Swift getting married to?",
- "What kind of car does Taylor Swift drive?",
- "Why is October 3rd important to Taylor Swift?"
- ],
+ "details": {
+ "items": [
+ "What health concerns does Taylor Swift have?",
+ "Who is Taylor Swift getting married to?",
+ "What kind of car does Taylor Swift drive?",
+ "Why is October 3rd important to Taylor Swift?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 44,
@@ -956,71 +933,50 @@
"details": {
"heading": "The Life of a Showgirl",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": " |Albums| ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+album&si=AL3DRZH23Fro5hVHuxACFfZLtAUEjrP1mvS5IXc-sdo0mUIKqjYGyETUh3cmJXIRnkWjV4eSinMhIZvNahoMx3TmbqHWiPDqJQAkUBrYBfYIXKzL8CCSYxLkusCo6lIhIkyrNprXkdS8&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQw_oBegQIfhAB&ictx=1"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+The+Life+of+a+Showgirl&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0rMwoTrLMNsrWEs9OttLPLS3OTNZPLCrJLC6xSsxJKs0tXsSqHJJYmZNfpBBcnplWohCSkargk5mWqpCfppCoEJyRX56eWZQDANnkUZRbAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAG"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+Lover&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0TMspLjI3y8nSEs9OttLPLS3OTNZPLCrJLC6xSsxJKs0tXsQqFJJYmZNfpBBcnplWouCTX5ZaBAAEtqTTSgAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAM"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+The+Tortured+Poets+Department&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0zEmxNM2yzKjUEs9OttLPLS3OTNZPLCrJLC6xSsxJKs0tXsSqFZJYmZNfpBBcnplWohCSkaoQkl9UUlqUmqIQkJ9aUqzgkloA1JKbmlcCAOaU815iAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAS"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+1989&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4gYxDQ1zcrMN8rTEs5Ot9HNLizOT9ROLSjKLS6wSc5JKc4sXsQqGJFbm5BcpBJdnppUoGFpaWAIAvFC_3EcAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+reputation&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0TDMwy07PNTfTEs9OttLPLS3OTNZPLCrJLC6xSsxJKs0tXsQqHpJYmZNfpBBcnplWolCUWlBakliSmZ8HAG6csxdPAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAe"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+Red&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4gIxcyxyM8wrtcSzk630c0uLM5P1E4tKMotLrBJzkkpzixexCoQkVubkFykEl2emlSgEpaYAAJe856FFAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAk"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+Midnights&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0LDa3SLI0zTLREs9OttLPLS3OTNZPLCrJLC6xSsxJKs0tXsQqFpJYmZNfpBBcnplWouCbmZKXmZ5RUgwAMbtWt04AAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAq"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+folklore&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4tVP1zc0zDbNqsgqNzTSEs9OttLPLS3OTNZPLCrJLC6xSsxJKs0tXsQqGpJYmZNfpBBcnplWopCWn5MN5KUCAOK10xlNAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhAw"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=Taylor+Swift+Fearless&stick=H4sIAAAAAAAAAONgFuLUz9U3SMkxNTNX4gIxTdJziiqytMSzk630c0uLM5P1E4tKMotLrBJzkkpzixexioYkVubkFykEl2emlSi4pSYW5aQWFwMAQQ09x0oAAAA&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQgOQBegQIZhA2"
},
{
- "misc": {},
"text": "See more",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=taylor+swift+album&si=AL3DRZH23Fro5hVHuxACFfZLtAUEjrP1mvS5IXc-sdo0mUIKqjYGyETUh3cmJXIRnkWjV4eSinMhIZvNahoMx3TmbqHWiPDqJQAkUBrYBfYIXKzL8CCSYxLkusCo6lIhIkyrNprXkdS8&sa=X&ved=2ahUKEwiJubvRiMaSAxVvRDABHSVKB6wQ9JYCegQIZhA6&ictx=1"
}
]
@@ -1094,7 +1050,7 @@
{
"cite": null,
"cmpt_rank": 16,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 65,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[83b17a6a7750].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[83b17a6a7750].json
index b209bec..c3c8c04 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[83b17a6a7750].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[83b17a6a7750].json
@@ -196,7 +196,7 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 13,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[8d1b75b71e7f].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[8d1b75b71e7f].json
index 11f529f..3568390 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[8d1b75b71e7f].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[8d1b75b71e7f].json
@@ -18,29 +18,22 @@
"heading": "Choose what you’re giving feedback on",
"img_url": null,
"text": "ser·en·dip·i·ty / ˌserənˈdipədē / Learn to pronounce noun noun : serendipity ; plural noun : serendipities the occurrence and development of events by chance in a happy or beneficial way. \"a fortunate stroke of serendipity\" h Similar: chance happy chance accident happy accident fluke luck good luck good fortune fortuity fortuitousness providence coincidence happy coincidence Origin 1754: coined by Horace Walpole, suggested by The Three Princes of Serendip , the title of a fairy tale in which the heroes ‘were always making discoveries, by accidents and sagacity, of things they were not in quest of’.",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Oxford Languages",
- "title": "",
"url": "https://languages.oup.com/google-dictionary-en"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch/answer/10106608?hl=en"
},
{
- "misc": {},
"text": " | |Learn to pronounce| | ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=how+to+pronounce+serendipity&stick=H4sIAAAAAAAAAOMIfcRoyy3w8sc9YSmTSWtOXmPU4-INKMrPK81LzkwsyczPExLnYglJLcoV4pfi5eIuTi1KzUvJLMgsqbRiUWJKzeNZxCqTkV-uUJKvUADUlg_Ul6qApAoA1EtmuWEAAAA&pron_lang=en&pron_country=us&sa=X&ved=2ahUKEwjfno-KiMaSAxU4hKgCHbt1MXwQ3eEDegQIMRAN"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "http://books.google.com/ngrams/graph?year_start=1800&year_end=2019&corpus=26&smoothing=7&case_insensitive=on&content=serendipity"
}
]
@@ -58,12 +51,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "What is the true meaning of serendipity?",
- "What is an example of serendipity?",
- "What does serendipity mean – its importance for creativity and ... - Noisli Noisli https://www.noisli.com › blog › what-does-serendipity-m... Noisli https://www.noisli.com › blog › what-does-serendipity-m...",
- "What is the synonym of serendipity?"
- ],
+ "details": {
+ "items": [
+ "What is the true meaning of serendipity?",
+ "What is an example of serendipity?",
+ "What does serendipity mean – its importance for creativity and ... - Noisli Noisli https://www.noisli.com › blog › what-does-serendipity-m... Noisli https://www.noisli.com › blog › what-does-serendipity-m...",
+ "What is the synonym of serendipity?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -415,7 +411,8 @@
"cmpt_rank": 11,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -444,7 +441,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 28,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[8e820f7b024f].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[8e820f7b024f].json
index 3239f85..5a79297 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[8e820f7b024f].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[8e820f7b024f].json
@@ -18,23 +18,18 @@
"heading": "Choose what you’re giving feedback on",
"img_url": null,
"text": "English - detected|English - detected|French|good morning|ˌɡo͝od ˈmôrniNG| Learn to pronounce | Learn to pronounce |Learn to pronounce|bonjour|. . .|.|.|.|. . .|.|.|.| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translation|. . .|.|.|.|(feminine)| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translation|. . .|.|.|.|(masculine)| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translations of |Recent languages French All languages Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Detect language Recent languages All languages Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu| Show more | Show more |Show less|Show less|Can't update the translation right now. Try again later.|Can't update the translation right now. Try again later.|•|Feedback",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": " | |Learn to pronounce| | ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=how+to+pronounce+good+morning&stick=H4sIAAAAAAAAAOMIfcRoxy3w8sc9YSnTSWtOXmPU5-INKMrPK81LzkwsyczPE5LgYglJLcoVEpDi4-JJz89PUcjNL8rLzEu3YlFiSs3jWcQqm5FfrlCSr1AA1JcP1JiqgKwMAPb7gD5jAAAA&pron_lang=en&pron_country=US&sa=X&ved=2ahUKEwihnqOqicaSAxVZTDABHSE2MsoQ9tEEegQIERAS"
},
{
- "misc": {},
"text": " | |Learn to pronounce| | ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=comment+prononcer+bonjour&stick=H4sIAAAAAAAAAOMIfcRoyS3w8sc9YSmDSWtOXmPU4uINKMrPK81LzkwsyczPExLmYglJLcoV4pbi5GJPys_Lyi8tsmJRYkor4lnEKpmcn5ubmleiUADUk5-XnFqkAFUCADzLRU1aAAAA&pron_lang=fr&pron_country=US&sa=X&ved=2ahUKEwihnqOqicaSAxVZTDABHSE2MsoQ9tEEegQIERAZ"
},
{
- "misc": {},
"text": "Open in Google Translate",
- "title": "",
"url": "https://translate.google.com/?um=1&ie=UTF-8&hl=en&client=tw-ob#auto/fr/good+morning"
}
]
@@ -52,12 +47,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "15 Greetings in French - YouTube YouTube https://www.youtube.com · Rosetta Stone YouTube https://www.youtube.com · Rosetta Stone",
- "How to Pronounce Bon Matin (French) - YouTube YouTube https://www.youtube.com · Julien Miquel YouTube https://www.youtube.com · Julien Miquel",
- "Do the French say \"je ne sais quoi\"?",
- "How to Say \"Good Morning\" in French - YouTube YouTube https://www.youtube.com · Big Bong YouTube https://www.youtube.com · Big Bong"
- ],
+ "details": {
+ "items": [
+ "15 Greetings in French - YouTube YouTube https://www.youtube.com · Rosetta Stone YouTube https://www.youtube.com · Rosetta Stone",
+ "How to Pronounce Bon Matin (French) - YouTube YouTube https://www.youtube.com · Julien Miquel YouTube https://www.youtube.com · Julien Miquel",
+ "Do the French say \"je ne sais quoi\"?",
+ "How to Say \"Good Morning\" in French - YouTube YouTube https://www.youtube.com · Big Bong YouTube https://www.youtube.com · Big Bong"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -87,7 +85,8 @@
"cmpt_rank": 3,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -146,7 +145,8 @@
"cmpt_rank": 7,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -343,7 +343,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 21,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[8f98fa9c0bef].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[8f98fa9c0bef].json
index 9d64155..46a4e74 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[8f98fa9c0bef].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[8f98fa9c0bef].json
@@ -210,7 +210,7 @@
{
"cite": null,
"cmpt_rank": 10,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 14,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[9101d12ab778].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[9101d12ab778].json
index 7a22a52..41c0a26 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[9101d12ab778].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[9101d12ab778].json
@@ -17,89 +17,62 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Dad Advice From Bo|YouTube • Jul 6, 2023",
- "title": "",
"url": "https://www.youtube.com/shorts/bInihYJPtEU"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=joBmbh0AGSQ"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/selfreliance/comments/iuliat/how_to_change_a_car_tire/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=QjZ5ohr7sGA"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/everymanshouldknow/comments/1icjyx/emsk_how_to_change_a_flat_tire/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=A_SMlicOjxI"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.lesschwab.com/article/tires/how-to-change-a-tire.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://africa.michelin.com/en/auto/auto-tips-and-advice/tire-maintenance/how-to-change-a-car-tire#:~:text=Here%20are%20some%20tips%20for%20changing%20a,out%20your%20owner's%20manual%20for%20more%20information."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.michelinman.com/auto/auto-tips-and-advice/tire-maintenance/how-to-change-a-car-tire"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.truecar.com/blog/change-tire-10-easy-steps/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.pitstoparabia.com/en/news/how-to-change-car-tyres"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://africa.michelin.com/en/auto/auto-tips-and-advice/tire-maintenance/how-to-change-a-car-tire"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -117,14 +90,12 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- {
- "misc": {},
- "text": "Rated 4.5 out of 5,",
- "title": "",
- "url": ""
- }
- ],
+ "details": {
+ "items": [
+ "Rated 4.5 out of 5,"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -138,14 +109,12 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- {
- "misc": {},
- "text": "Rated 4.7 out of 5,",
- "title": "",
- "url": ""
- }
- ],
+ "details": {
+ "items": [
+ "Rated 4.7 out of 5,"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -215,12 +184,15 @@
{
"cite": null,
"cmpt_rank": 4,
- "details": [
- "How to Change a Tire | Change a flat car tire step by step YouTube https://www.youtube.com · Howdini YouTube https://www.youtube.com · Howdini",
- "What is the 3% rule for tires?",
- "How to Change a Tire - Jack and Jill of All Tires Jack and Jill of All Tires https://jackandjillofalltires.com › how-to-change-a-tire Jack and Jill of All Tires https://jackandjillofalltires.com › how-to-change-a-tire",
- "Which side to unscrew a tire?"
- ],
+ "details": {
+ "items": [
+ "How to Change a Tire | Change a flat car tire step by step YouTube https://www.youtube.com · Howdini YouTube https://www.youtube.com · Howdini",
+ "What is the 3% rule for tires?",
+ "How to Change a Tire - Jack and Jill of All Tires Jack and Jill of All Tires https://jackandjillofalltires.com › how-to-change-a-tire Jack and Jill of All Tires https://jackandjillofalltires.com › how-to-change-a-tire",
+ "Which side to unscrew a tire?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 7,
@@ -236,7 +208,8 @@
"cmpt_rank": 5,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -589,7 +562,8 @@
"cmpt_rank": 10,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -632,7 +606,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 36,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[923a428c1c22].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[923a428c1c22].json
index 15baa3b..944eccc 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[923a428c1c22].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[923a428c1c22].json
@@ -182,12 +182,15 @@
{
"cite": null,
"cmpt_rank": 3,
- "details": [
- "Ways to Watch - NASA NASA https://www.nasa.gov › ways-to-watch NASA https://www.nasa.gov › ways-to-watch",
- "Who owns NASA and how is it funded? 6 interesting facts The Economic Times https://m.economictimes.com › news › science › slideshow The Economic Times https://m.economictimes.com › news › science › slideshow",
- "Why is Artemis 2 delayed?",
- "NASA Astronauts Get Paid HOW MUCH After 9 Months in Space?! YouTube https://www.youtube.com · Benzinga YouTube https://www.youtube.com · Benzinga"
- ],
+ "details": {
+ "items": [
+ "Ways to Watch - NASA NASA https://www.nasa.gov › ways-to-watch NASA https://www.nasa.gov › ways-to-watch",
+ "Who owns NASA and how is it funded? 6 interesting facts The Economic Times https://m.economictimes.com › news › science › slideshow The Economic Times https://m.economictimes.com › news › science › slideshow",
+ "Why is Artemis 2 delayed?",
+ "NASA Astronauts Get Paid HOW MUCH After 9 Months in Space?! YouTube https://www.youtube.com · Benzinga YouTube https://www.youtube.com · Benzinga"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 12,
@@ -232,53 +235,38 @@
"details": {
"heading": "NASA",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://en.wikipedia.org/wiki/NASA"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/topic/NASA"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.space.com/38700-nasa-history.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://m.economictimes.com/news/science/who-owns-nasa-and-how-is-it-funded-6-interesting-facts/nasa-leading-the-way-in-space-exploration/slideshow/112605825.cms"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://theweek.com/in-depth/1023601/what-is-nasa-working-on"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.federalregister.gov/agencies/national-aeronautics-and-space-administration"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.planetary.org/the-national-aeronautics-and-space-administration-nasa"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://nasa.fandom.com/wiki/NASA_(Wikipedia)"
}
]
@@ -492,7 +480,7 @@
{
"cite": null,
"cmpt_rank": 10,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 30,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[97404b7b7c61].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[97404b7b7c61].json
index ab6d74b..b2827b8 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[97404b7b7c61].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[97404b7b7c61].json
@@ -17,65 +17,46 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=uyFvaeSENcCKwbkPqPGKsAk&ved=2ahUKEwjSkYDkv5mSAxWXSDABHUvgByYQgK4QegYIAAgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://spaceplace.nasa.gov/blue-sky/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue#:~:text=The%20sky%20appears%20blue%20because%20of%20the,we%20can%20see%20look%20blue%20or%20violet**"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.montrealsciencecentre.com/blog/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.rmg.co.uk/stories/space-astronomy/why-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -93,12 +74,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "How to explain to a kid why the sky is blue?",
- "Why the sky looks bluer in fall and winter - WSAV-TV WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo... WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo...",
- "Why is the sky Blue? desy.de https://www.desy.de › user › projects › Physics › General desy.de https://www.desy.de › user › projects › Physics › General"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "How to explain to a kid why the sky is blue?",
+ "Why the sky looks bluer in fall and winter - WSAV-TV WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo... WSAV-TV https://www.wsav.com › weather-news › why-the-sky-lo...",
+ "Why is the sky Blue? desy.de https://www.desy.de › user › projects › Physics › General desy.de https://www.desy.de › user › projects › Physics › General"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -352,7 +336,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -383,7 +368,8 @@
"cmpt_rank": 10,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -412,7 +398,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 23,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[984065877aad].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[984065877aad].json
index 98f804b..7a0b29f 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[984065877aad].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[984065877aad].json
@@ -17,179 +17,122 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.franklin.edu/blog/is-college-worth-the-cost"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/findapath/comments/17foihd/is_college_even_worth_it_anymore/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.ppic.org/publication/is-college-worth-it/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://bigfuture.collegeboard.org/plan-for-college/get-started/why-college-is-important#:~:text=College%20does%20matter%20and%20is%20absolutely%20worth,requires%20education%20or%20training%20beyond%20high%20school."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.knox.edu/magazine/spring-2018/features/yes-college-is-worth-it"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nbcnews.com/politics/politics-news/poll-dramatic-shift-americans-no-longer-see-four-year-college-degrees-rcna243672"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/shorts/Ep0750U7M18#:~:text=Trades%2C%20apprenticeships%2C%20and%20businesses%20such%20as%20HVAC%2C,to%20which%20the%20MBA%20graduate%20is%20applying."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://magazine.utah.edu/issues/fall-2024/is-college-still-worth-it/#:~:text=Beyond%20the%20financial%20gains%2C%20her%20research%20uncovers,their%20futures%20compared%20to%20those%20without%20degrees."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=AOVD1NMqYDk"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.usatoday.com/story/news/nation/2025/12/02/college-degree-worth-cost-poll/87567664007/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.chicagofed.org/publications/chicago-fed-insights/2024/policy-brief-is-college-a-worthwhile-investment#:~:text=For%20example%2C%20research%20from%20College%20Board%20indicates,for%20both%20public%20and%20private%20non%2Dprofit%20colleges."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.forbes.com/sites/prestoncooper2/2024/05/17/is-college-worth-it-thats-the-wrong-question/#:~:text=The%20analysis%20demonstrates%20that%20certain%20majors%20are,their%20net%20lifetime%20earnings%20by%20over%20$500%2C000."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wcnc.com/article/money/4-year-college-degree-worth-it-financial-analysis-money-education/275-8e064c56-b11b-4dd7-b30c-d9257dfe87ae#:~:text=Researchers%20say%20investing%20in%20a%20college%20degree,bringing%20home%20the%20most%20money%20on%20average."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.collegeflightpath.com/cfp-blog/understanding-the-roi-of-college-majors-financial-considerations#:~:text=High%2DPaying%20College%20Majors%20&%20ROI%20by%20Major,at%20the%20top%20of%20the%20ROI%20charts."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.insidehighered.com/news/student-success/college-experience/2024/05/29/cost-higher-education-not-worth-it-students#:~:text=Who's%20paying%20for%20it?%20When%20confronted%20with,graduating%2C%20hurting%20the%20overall%20return%20on%20investment."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.ivywise.com/admissions-counseling/academic-advising/#:~:text=Studies%20have%20shown%20that%20a%20lack%20of,year%20of%20college%20can%20cost%20around%20$68%2C000."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/careeradvice/comments/1o1nm6d/is_college_a_scam/#:~:text=If%20you%20go%20into%20with%20no%20career,expect%20it%20to%20be%20a%20golden%20ticket."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://magazine.utah.edu/issues/fall-2024/is-college-still-worth-it/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://bigfuture.collegeboard.org/plan-for-college/get-started/why-college-is-important"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/shorts/Ep0750U7M18"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.chicagofed.org/publications/chicago-fed-insights/2024/policy-brief-is-college-a-worthwhile-investment"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.forbes.com/sites/prestoncooper2/2024/05/17/is-college-worth-it-thats-the-wrong-question/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wcnc.com/article/money/4-year-college-degree-worth-it-financial-analysis-money-education/275-8e064c56-b11b-4dd7-b30c-d9257dfe87ae"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.collegeflightpath.com/cfp-blog/understanding-the-roi-of-college-majors-financial-considerations"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.insidehighered.com/news/student-success/college-experience/2024/05/29/cost-higher-education-not-worth-it-students"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.ivywise.com/admissions-counseling/academic-advising/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/careeradvice/comments/1o1nm6d/is_college_a_scam/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -221,12 +164,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Why isn't Gen Z going to college?",
- "Is $70,000 a good salary out of college?",
- "Can you make $100,000 a year without a degree?",
- "Why College Is Important - BigFuture BigFuture - College Board https://bigfuture.collegeboard.org › get-started › why-col... BigFuture - College Board https://bigfuture.collegeboard.org › get-started › why-col..."
- ],
+ "details": {
+ "items": [
+ "Why isn't Gen Z going to college?",
+ "Is $70,000 a good salary out of college?",
+ "Can you make $100,000 a year without a degree?",
+ "Why College Is Important - BigFuture BigFuture - College Board https://bigfuture.collegeboard.org › get-started › why-col... BigFuture - College Board https://bigfuture.collegeboard.org › get-started › why-col..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -548,7 +494,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 25,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[9a7e39d95bf0].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[9a7e39d95bf0].json
index 78e70b1..34ff7b6 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[9a7e39d95bf0].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[9a7e39d95bf0].json
@@ -17,107 +17,74 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Peekaboo Kidz|YouTube • May 13, 2015",
- "title": "",
"url": "https://www.youtube.com/watch?v=D1Ymc311XS8&t=110"
},
{
- "misc": {},
"text": "Light-Dependent Reactions",
- "title": "",
"url": "https://www.google.com/search?q=Light-Dependent+Reactions&sei=Nn6GacymI9G1vr0P5NCG-QU&ved=2ahUKEwjnooibh8aSAxUCKRAIHcliOF8QgK4QegYIAAgDEAE"
},
{
- "misc": {},
"text": "Light-Independent Reactions",
- "title": "",
"url": "https://www.google.com/search?q=Light-Independent+Reactions&sei=Nn6GacymI9G1vr0P5NCG-QU&ved=2ahUKEwjnooibh8aSAxUCKRAIHcliOF8QgK4QegYIAAgDEAQ"
},
{
- "misc": {},
"text": "Calvin Cycle",
- "title": "",
"url": "https://www.google.com/search?q=Calvin+Cycle&sei=Nn6GacymI9G1vr0P5NCG-QU&ved=2ahUKEwjnooibh8aSAxUCKRAIHcliOF8QgK4QegYIAAgDEAU"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://education.nationalgeographic.org/resource/photosynthesis/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=fTXh7A7Uc2M"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.life.illinois.edu/govindjee/page2.html#:~:text=Photosynthesis%20is%20the%20process%20by%20which%20plants,and%20NADP%20available%20to%20continue%20the%20process"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://en.wikipedia.org/wiki/Photosynthesis"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.sciencefocus.com/nature/how-does-photosynthesis-work"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/science/photosynthesis"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.khanacademy.org/science/ap-biology/cellular-energetics/photosynthesis/v/photosynthesis#:~:text=Photosynthesis%20is%20essential%20for%20life%20on%20Earth.,to%20produce%20carbohydrates.%20Created%20by%20Sal%20Khan."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=CL9A8YhwUps"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.knowatom.com/science-phenomena-videos/6th-grade-science/photosynthesis"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.life.illinois.edu/govindjee/page2.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.khanacademy.org/science/ap-biology/cellular-energetics/photosynthesis/v/photosynthesis"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -149,12 +116,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "The process of photosynthesis - Student Academic Success Monash University https://www.monash.edu › ... › Biology › Photosynthesis Monash University https://www.monash.edu › ... › Biology › Photosynthesis",
- "What are the 4 steps of photosynthesis?",
- "photosynthesis - Kids | Britannica Kids | Homework Help Britannica Kids https://kids.britannica.com › kids › article › photosynthesis Britannica Kids https://kids.britannica.com › kids › article › photosynthesis",
- "Why is photosynthesis a short answer?"
- ],
+ "details": {
+ "items": [
+ "The process of photosynthesis - Student Academic Success Monash University https://www.monash.edu › ... › Biology › Photosynthesis Monash University https://www.monash.edu › ... › Biology › Photosynthesis",
+ "What are the 4 steps of photosynthesis?",
+ "photosynthesis - Kids | Britannica Kids | Homework Help Britannica Kids https://kids.britannica.com › kids › article › photosynthesis Britannica Kids https://kids.britannica.com › kids › article › photosynthesis",
+ "Why is photosynthesis a short answer?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -198,7 +168,8 @@
"cmpt_rank": 5,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -367,7 +338,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 17,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[9ed1baa7715d].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[9ed1baa7715d].json
index e3c3a0e..afb33bc 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[9ed1baa7715d].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[9ed1baa7715d].json
@@ -17,47 +17,34 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Peacock|Subscription|Watch",
- "title": "",
"url": "https://www.peacocktv.com/watch-online/tv/the-office/4902514835143843112"
},
{
- "misc": {},
"text": "Sling TV|Subscription| |(|Requires add-on|)|Watch",
- "title": "",
"url": "https://watch.sling.com/1/franchise/c6efea3b949ad24f9a6d236e71b8b3cd/browse?trackingid=google-feed"
},
{
- "misc": {},
"text": "Amazon Prime Video|Subscription| |(|Requires add-on|)|Watch",
- "title": "",
"url": "https://www.amazon.com/gp/video/detail/amzn1.dv.gti.b2a10098-1876-492c-8857-dd091c87cd21?autoplay=0&ref_=atv_cf_strg_wb"
},
{
- "misc": {},
"text": "YouTube TV|Subscription| |(|Requires add-on|)|Watch",
- "title": "",
"url": "https://tv.youtube.com/browse/UCLqUTxeCVt9p90GLFKUsGFA?utm_source=onebox"
},
{
- "misc": {},
"text": "YouTube|From |$1.99|Watch",
- "title": "",
"url": "https://www.youtube.com/watch?v=bxMy27_rGhs"
},
{
- "misc": {},
"text": "Fandango at Home|$1.99|Watch",
- "title": "",
"url": "https://athome.fandango.com/content/browse/details/The-Office-Finale-Pt-2/446371?cmp=OrganicSearch~Vudu~GoogleWatch"
},
{
- "misc": {},
"text": "Apple TV|$2.99|Watch",
- "title": "",
"url": "https://tv.apple.com/us/episode/the-dundies/umc.cmc.10237h1l57lzymzt2tsuinwhy?action=playSmartEpisode"
}
]
@@ -131,12 +118,15 @@
{
"cite": null,
"cmpt_rank": 5,
- "details": [
- "Watch The Office Streaming | Peacock Peacock https://www.peacocktv.com › stream-tv › the-office Peacock https://www.peacocktv.com › stream-tv › the-office",
- "The Office Removal from Netflix in the US - Facebook Facebook https://www.facebook.com › groups › posts Facebook https://www.facebook.com › groups › posts",
- "Watch The Office | Season 1 Episode 1 | HBO Max HBO Max https://www.hbomax.com › shows › office-2005 › e1-pilot HBO Max https://www.hbomax.com › shows › office-2005 › e1-pilot",
- "You Can Now Stream \"The Office\" For Free on Peacock - The Credits Motion Picture Association https://www.motionpictures.org › 2021/03 › you-can-no... Motion Picture Association https://www.motionpictures.org › 2021/03 › you-can-no..."
- ],
+ "details": {
+ "items": [
+ "Watch The Office Streaming | Peacock Peacock https://www.peacocktv.com › stream-tv › the-office Peacock https://www.peacocktv.com › stream-tv › the-office",
+ "The Office Removal from Netflix in the US - Facebook Facebook https://www.facebook.com › groups › posts Facebook https://www.facebook.com › groups › posts",
+ "Watch The Office | Season 1 Episode 1 | HBO Max HBO Max https://www.hbomax.com › shows › office-2005 › e1-pilot HBO Max https://www.hbomax.com › shows › office-2005 › e1-pilot",
+ "You Can Now Stream \"The Office\" For Free on Peacock - The Credits Motion Picture Association https://www.motionpictures.org › 2021/03 › you-can-no... Motion Picture Association https://www.motionpictures.org › 2021/03 › you-can-no..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 5,
@@ -166,7 +156,8 @@
"cmpt_rank": 7,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -183,7 +174,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -240,7 +232,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 12,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c881e003e2].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c881e003e2].json
index a653360..921508c 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c881e003e2].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c881e003e2].json
@@ -210,12 +210,15 @@
{
"cite": null,
"cmpt_rank": 8,
- "details": [
- "Video shows aftermath of six tornadoes hitting Houston area over ... YouTube https://www.youtube.com · CBS TEXAS YouTube https://www.youtube.com · CBS TEXAS",
- "What is the nicest area of Houston to live in?",
- "Is $100,000 a good salary in Houston?",
- "What part of Houston to stay away from?"
- ],
+ "details": {
+ "items": [
+ "Video shows aftermath of six tornadoes hitting Houston area over ... YouTube https://www.youtube.com · CBS TEXAS YouTube https://www.youtube.com · CBS TEXAS",
+ "What is the nicest area of Houston to live in?",
+ "Is $100,000 a good salary in Houston?",
+ "What part of Houston to stay away from?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 14,
@@ -257,7 +260,7 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 17,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c8fe7fe769].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c8fe7fe769].json
index 4348485..1982dff 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c8fe7fe769].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[a6c8fe7fe769].json
@@ -17,23 +17,18 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Ined - Institut national d'études démographiques",
- "title": "",
"url": "https://www.ined.fr/en/news/press/the-french-population-is-still-increasing-but-for-how-long/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -65,12 +60,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "What US state is France the size of?",
- "Who is the top 1 world population?",
- "UK population exceeds that of France for first time on record, ONS data ... The Guardian https://www.theguardian.com › world › jan › uk-populat... The Guardian https://www.theguardian.com › world › jan › uk-populat...",
- "France Population (2026) - Worldometer Worldometer https://www.worldometers.info › world-population › fr... Worldometer https://www.worldometers.info › world-population › fr..."
- ],
+ "details": {
+ "items": [
+ "What US state is France the size of?",
+ "Who is the top 1 world population?",
+ "UK population exceeds that of France for first time on record, ONS data ... The Guardian https://www.theguardian.com › world › jan › uk-populat... The Guardian https://www.theguardian.com › world › jan › uk-populat...",
+ "France Population (2026) - Worldometer Worldometer https://www.worldometers.info › world-population › fr... Worldometer https://www.worldometers.info › world-population › fr..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -322,7 +320,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 20,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[aa594f199c3d].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[aa594f199c3d].json
index 7411007..5aa7ca4 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[aa594f199c3d].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[aa594f199c3d].json
@@ -17,77 +17,54 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=Yg6FaazIEMy7mtkP5KyvyQQ&ved=2ahUKEwjn0_i1qMOSAxVVlGoFHXFIF7YQgK4QegYIAQgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/story/why-is-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.instagram.com/reel/DIZJjPvOtIN/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.rmg.co.uk/stories/space-astronomy/why-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wsav.com/weather-news/why-the-sky-looks-bluer-in-fall-and-winter/#:~:text=In%20summer%2C%20the%20sun%20sits%20higher%20in,intensifying%20the%20blue%20appearance%20of%20the%20sky."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wsav.com/weather-news/why-the-sky-looks-bluer-in-fall-and-winter/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -105,12 +82,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "What is the true color of our sky?",
- "How to explain to a kid why the sky is blue?",
- "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "What is the true color of our sky?",
+ "How to explain to a kid why the sky is blue?",
+ "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -378,7 +358,8 @@
"cmpt_rank": 7,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -561,7 +542,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 33,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[b15c5131b06c].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[b15c5131b06c].json
index 1d0656e..18a1a7d 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[b15c5131b06c].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[b15c5131b06c].json
@@ -18,41 +18,30 @@
"heading": "NBANBA",
"img_url": null,
"text": "NBANBA",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "► 17:42",
- "title": "",
"url": "https://www.youtube.com/watch?v=EfJoHaqW2u0&feature=onebox"
},
{
- "misc": {},
"text": "► 16:59",
- "title": "",
"url": "https://www.youtube.com/watch?v=BizQrqQOoTE&feature=onebox"
},
{
- "misc": {},
"text": "► 15:27",
- "title": "",
"url": "https://www.youtube.com/watch?v=yi87dpRsySg&feature=onebox"
},
{
- "misc": {},
"text": "► 16:42",
- "title": "",
"url": "https://www.youtube.com/watch?v=lSVYt033aIY&feature=onebox"
},
{
- "misc": {},
"text": "► 16:49",
- "title": "",
"url": "https://www.youtube.com/watch?v=7QLyUH31lYc&feature=onebox"
},
{
- "misc": {},
"text": "► 15:47",
- "title": "",
"url": "https://www.youtube.com/watch?v=GC0NYWhUAiE&feature=onebox"
}
]
@@ -308,14 +297,17 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [
- "ESPN College Basketball",
- "ESPN Major League Baseball",
- "NFL on ESPN",
- "ESPN College Football",
- "NBA on ESPN",
- "NHL on ESPN"
- ],
+ "details": {
+ "items": [
+ "ESPN College Basketball",
+ "ESPN Major League Baseball",
+ "NFL on ESPN",
+ "ESPN College Football",
+ "NBA on ESPN",
+ "NHL on ESPN"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 18,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[b2e1777bf0f2].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[b2e1777bf0f2].json
index b40adb4..00a8ca4 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[b2e1777bf0f2].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[b2e1777bf0f2].json
@@ -17,35 +17,26 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=albert+einstein&udm=2&source=univ&sa=X&ved=2ahUKEwiW1JKtiMaSAxWVoq8BHegXMk0QnN8JegQIERAD"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.theguardian.com/science/alberteinstein"
},
{
- "misc": {},
"text": "Born|Mar 14, 1879|Ulm, Germany",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=albert+einstein+born&stick=H4sIAAAAAAAAAOPgE2LXz9U3yEqu0BLLTrbSL0jNL8hJBVJFxfl5Vkn5RXmLWEUSc5JSi0oUUjPzikuAhAJIGADHZqekOgAAAA&sa=X&ved=2ahUKEwiW1JKtiMaSAxWVoq8BHegXMk0Q18AJegQILRAB"
},
{
- "misc": {},
"text": "Died|Apr 18, 1955|Princeton, NJ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=albert+einstein+died&stick=H4sIAAAAAAAAAOPgE2LXz9U3yEqu0JLPTrbSL0jNL8hJ1U9JTU5NLE5NiS9ILSrOz7NKyUxNWcQqkpiTlFpUopCamVdcAiQUQMIAeqAPLEMAAAA&sa=X&ved=2ahUKEwiW1JKtiMaSAxWVoq8BHegXMk0Q18AJegQINhAB"
},
{
- "misc": {},
"text": "Who was Albert Einstein? | The World Famous Scientist|YouTube|·|Little School|·|Aug 17, 2023",
- "title": "",
"url": "https://www.youtube.com/watch?v=0Zn9M8bjB4s"
}
]
@@ -77,12 +68,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "What was Einstein's IQ?",
- "What are 5 things Albert Einstein is famous for?",
- "What are 10 facts about Albert Einstein?",
- "What happened to Albert Einstein as a child?"
- ],
+ "details": {
+ "items": [
+ "What was Einstein's IQ?",
+ "What are 5 things Albert Einstein is famous for?",
+ "What are 10 facts about Albert Einstein?",
+ "What happened to Albert Einstein as a child?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -908,7 +902,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 61,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[be99c971b8f7].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[be99c971b8f7].json
index 147c46f..c83d25b 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[be99c971b8f7].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[be99c971b8f7].json
@@ -17,77 +17,54 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=DX6GadapJbrIwPAPs6DakAQ&ved=2ahUKEwj3_L-Hh8aSAxWwCBAIHZSuDXkQgK4QegYIAQgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/story/why-is-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.instagram.com/reel/DIZJjPvOtIN/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.rmg.co.uk/stories/space-astronomy/why-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wsav.com/weather-news/why-the-sky-looks-bluer-in-fall-and-winter/#:~:text=In%20summer%2C%20the%20sun%20sits%20higher%20in,intensifying%20the%20blue%20appearance%20of%20the%20sky."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wsav.com/weather-news/why-the-sky-looks-bluer-in-fall-and-winter/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -105,12 +82,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "What is the true color of our sky?",
- "How to explain to a kid why the sky is blue?",
- "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "What is the true color of our sky?",
+ "How to explain to a kid why the sky is blue?",
+ "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -364,7 +344,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -381,7 +362,8 @@
"cmpt_rank": 9,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -424,7 +406,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 23,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[c48f8aa3f6da].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[c48f8aa3f6da].json
index 268cc96..da7e28b 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[c48f8aa3f6da].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[c48f8aa3f6da].json
@@ -28,12 +28,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Art Deco at 100: The Enduring Influence of Design National Building Museum https://nbm.org › Blog Post National Building Museum https://nbm.org › Blog Post",
- "What are the four key features of Art Deco?",
- "Why did America abandon Art Deco?",
- "What is the difference between Art Nouveau and Art Deco?"
- ],
+ "details": {
+ "items": [
+ "Art Deco at 100: The Enduring Influence of Design National Building Museum https://nbm.org › Blog Post National Building Museum https://nbm.org › Blog Post",
+ "What are the four key features of Art Deco?",
+ "Why did America abandon Art Deco?",
+ "What is the difference between Art Nouveau and Art Deco?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -607,14 +610,17 @@
{
"cite": null,
"cmpt_rank": 11,
- "details": [
- "Art Nouveau",
- "Modern art",
- "Art Deco",
- "Gothic Revival architecture",
- "Modernism",
- "Impressionism"
- ],
+ "details": {
+ "items": [
+ "Art Nouveau",
+ "Modern art",
+ "Art Deco",
+ "Gothic Revival architecture",
+ "Modernism",
+ "Impressionism"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 42,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[c9ab650f5bda].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[c9ab650f5bda].json
index deaa92b..e33b319 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[c9ab650f5bda].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[c9ab650f5bda].json
@@ -17,107 +17,74 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Rayleigh scattering",
- "title": "",
"url": "https://www.google.com/search?q=Rayleigh+scattering&sei=f1yFadntLOaOwbkP6tLk-QY&ved=2ahUKEwjCo7718sOSAxXZi7AFHSGSC98QgK4QegYIAQgAEAY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue#:~:text=Gases%20and%20particles%20in%20Earth's%20atmosphere%20scatter,a%20blue%20sky%20most%20of%20the%20time."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.weather.gov/fgz/SkyBlue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.facebook.com/jakedunnekwch/posts/why-is-the-sky-blue-its-an-age-old-question-that-actually-has-a-very-simple-answ/1104044168200325/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://morgridge.org/blue-sky/why-is-the-sky-blue/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/askscience/comments/14566ig/why_is_the_sky_blue_do_i_understand_it_correctly/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html#:~:text=We%20have%20three%20types%20of%20colour%20receptors%2C,visual%20system%20constructs%20the%20colours%20we%20see."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://kids.nationalgeographic.com/books/article/sky"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.uu.edu/dept/physics/scienceguys/2000Oct.cfm"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.britannica.com/story/why-is-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.montrealsciencecentre.com/blog/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://plus.nasa.gov/video/space-place-in-a-snap-why-is-the-sky-blue-2/#:~:text=When%20sunlight%20enters%20Earth's%20atmosphere%2C%20it%20encounters,sky%20appears%20blue%20on%20a%20sunny%20day."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nesdis.noaa.gov/about/k-12-education/atmosphere/why-the-sky-blue"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://plus.nasa.gov/video/space-place-in-a-snap-why-is-the-sky-blue-2/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://math.ucr.edu/home/baez/physics/General/BlueSky/blue_sky.html"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -135,12 +102,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Why is the sky blue short answer?",
- "What is the true color of our sky?",
- "How to explain to a kid why the sky is blue?",
- "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
- ],
+ "details": {
+ "items": [
+ "Why is the sky blue short answer?",
+ "What is the true color of our sky?",
+ "How to explain to a kid why the sky is blue?",
+ "Why is the Sky Blue? - YouTube YouTube https://www.youtube.com · Patristic Nectar YouTube https://www.youtube.com · Patristic Nectar"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -394,7 +364,8 @@
"cmpt_rank": 8,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -411,7 +382,8 @@
"cmpt_rank": 9,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -454,7 +426,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 23,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[cad43c3268a8].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[cad43c3268a8].json
index d3ec8c5..ac8403d 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[cad43c3268a8].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[cad43c3268a8].json
@@ -17,11 +17,10 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Netflix|Subscription|Watch",
- "title": "",
"url": "https://www.netflix.com/watch/80057281?source=35"
}
]
@@ -53,12 +52,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Can I watch Stranger Things on anything other than Netflix?",
- "It's stranger things eve Watch Stranger Things 5: Volume 2, out 26 ... Instagram https://www.instagram.com › reel Instagram https://www.instagram.com › reel",
- "What streaming service is Stranger Things on?",
- "Is Stranger Things season 5 release date on Netflix?"
- ],
+ "details": {
+ "items": [
+ "Can I watch Stranger Things on anything other than Netflix?",
+ "It's stranger things eve Watch Stranger Things 5: Volume 2, out 26 ... Instagram https://www.instagram.com › reel Instagram https://www.instagram.com › reel",
+ "What streaming service is Stranger Things on?",
+ "Is Stranger Things season 5 release date on Netflix?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -172,7 +174,8 @@
"cmpt_rank": 10,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -201,7 +204,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 12,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[ce37f114963e].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[ce37f114963e].json
index 3c5c67b..827da0e 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[ce37f114963e].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[ce37f114963e].json
@@ -18,17 +18,14 @@
"heading": "Choose what you’re giving feedback on",
"img_url": null,
"text": "English - detected|English - detected|Korean|thank you|ˈTHaNGk ˌyo͞o| Learn to pronounce | Learn to pronounce |Learn to pronounce|감사합니다|. . .|.|.|.|gamsahabnida|. . .|.|.|.| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translation|. . .|.|.|.|(feminine)| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translation|. . .|.|.|.|(masculine)| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translations of Thank you!|Thank you!|고맙습니다!|고맙습니다!|감사합니다!|감사합니다!|Recent languages Korean All languages Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Detect language Recent languages All languages Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu| Show more | Show more |Show less|Show less|Can't update the translation right now. Try again later.|Can't update the translation right now. Try again later.|•|Feedback",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": " | |Learn to pronounce| | ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=how+to+pronounce+thank+you&stick=H4sIAAAAAAAAAOMIfcRozS3w8sc9YSmjSWtOXmPU4eINKMrPK81LzkwsyczPExLlYglJLcoV4pXi5uIsyUjMy1aozC-1YlFiSs3jWcQqlZFfrlCSr1AA1JQP1JWqAFcDAGEn4h1dAAAA&pron_lang=en&pron_country=US&sa=X&ved=2ahUKEwihy4G8icaSAxWiQzABHUmoAZQQ9tEEegQIGBAS"
},
{
- "misc": {},
"text": "Open in Google Translate",
- "title": "",
"url": "https://translate.google.com/?um=1&ie=UTF-8&hl=en&client=tw-ob#auto/ko/thank+you"
}
]
@@ -46,12 +43,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "How do you politely say thank you in Korean?",
- "What does kamsahamnida mean?",
- "Do Koreans say Arigato?",
- "How to Say 'Thank You' in Korean – LingQ Blog LingQ https://www.lingq.com › blog › thank-you-in-korean LingQ https://www.lingq.com › blog › thank-you-in-korean"
- ],
+ "details": {
+ "items": [
+ "How do you politely say thank you in Korean?",
+ "What does kamsahamnida mean?",
+ "Do Koreans say Arigato?",
+ "How to Say 'Thank You' in Korean – LingQ Blog LingQ https://www.lingq.com › blog › thank-you-in-korean LingQ https://www.lingq.com › blog › thank-you-in-korean"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -68,59 +68,42 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://migaku.com/blog/korean/thank-you-in-korean"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.clozemaster.com/blog/thank-you-in-korean/#:~:text=There%20are%20four%20levels%20of%20formality%20when,formal**%20%EB%B3%84%EB%A7%90%EC%94%80%EC%9D%84%EC%9A%94%20(byul%2Dmal%2Dsseum%2Deul%2Dyo)%20*%20**Polite**%20%EC%95%84%EB%8B%88%EC%97%90%EC%9A%94%20(a%2Dni%2Dae%2Dyo)"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://itranslate.com/blog/how-to-say-thank-you-in-korean"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.tiktok.com/@dailydoseofkorean/video/7165320589710757122#:~:text=Let's%20master%20the%20art%20of,at:%20Feedback%20and%20help%20%2D%20TikTok&text=how%20about%20come%20ma%20whoa,yo!&text=yes%2C%20please%20keep%20doing%20that,learn%20and%20reamember%20the%20word."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.busuu.com/en/korean/thank-you#:~:text=1.,4."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.kylian.ai/blog/en/gomawo-or-kamsahamnida#:~:text=%22Kamsahamnida%22%20(%EA%B0%90%EC%82%AC%ED%95%A9%EB%8B%88%EB%8B%A4):,expression%20of%20gratitude%20in%20Korean."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/Korean/comments/ur9w90/what_is_the_correct_thank_you_to_use/#:~:text=Just%20say%20%EA%B0%90%EC%82%AC%ED%95%A9%EB%8B%88%EB%8B%A4%20(kamsahamnida,learn%20English%20in%20school%20anyway."
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -488,7 +471,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 28,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[d1855fa9cd1c].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[d1855fa9cd1c].json
index f05dfd4..1b4cc94 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[d1855fa9cd1c].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[d1855fa9cd1c].json
@@ -14,7 +14,7 @@
{
"cite": "https://www.sleepopolis.com › best › mattresses",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 0,
@@ -28,7 +28,7 @@
{
"cite": "https://www.mattressfirm.com",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 1,
@@ -42,32 +42,27 @@
{
"cite": "https://www.wired.com › best › mattress",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "A Clear Winner Emerged",
- "url": "https://www.wired.com/gallery/best-mattresses-1?%7Bignore%7D&g_acctid=102-367-3149&g_adgroupid=%7Badgroupid%7D&g_adid=%7Bcreative%7D&g_adtype=search&g_campaign=US%7C3ZWired%7CPS%7CNB%7CPHR%7CTP&g_campaignid=%7Bcampaignid%7D&g_keyword=%7Bkeyword%7D&g_keywordid=%7Btargetid%7D&g_network=%7Bnetwork%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Updated for 2026",
- "url": "https://www.wired.com/gallery/best-mattresses-1?%7Bignore%7D&g_acctid=102-367-3149&g_adgroupid=%7Badgroupid%7D&g_adid=%7Bcreative%7D&g_adtype=search&g_campaign=US%7C3ZWired%7CPS%7CNB%7CPHR%7CTP&g_campaignid=%7Bcampaignid%7D&g_keyword=%7Bkeyword%7D&g_keywordid=%7Btargetid%7D&g_network=%7Bnetwork%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best For Side Sleepers",
- "url": "https://www.wired.com/gallery/best-mattresses-1/#6744c6d3d637c52ee3e0af45"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best Overall Mattress",
- "url": "https://www.wired.com/gallery/best-mattresses-1/#6744bbaa05df4fffe2abbbe9"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "A Clear Winner Emerged",
+ "url": "https://www.wired.com/gallery/best-mattresses-1?%7Bignore%7D&g_acctid=102-367-3149&g_adgroupid=%7Badgroupid%7D&g_adid=%7Bcreative%7D&g_adtype=search&g_campaign=US%7C3ZWired%7CPS%7CNB%7CPHR%7CTP&g_campaignid=%7Bcampaignid%7D&g_keyword=%7Bkeyword%7D&g_keywordid=%7Btargetid%7D&g_network=%7Bnetwork%7D"
+ },
+ {
+ "title": "Updated for 2026",
+ "url": "https://www.wired.com/gallery/best-mattresses-1?%7Bignore%7D&g_acctid=102-367-3149&g_adgroupid=%7Badgroupid%7D&g_adid=%7Bcreative%7D&g_adtype=search&g_campaign=US%7C3ZWired%7CPS%7CNB%7CPHR%7CTP&g_campaignid=%7Bcampaignid%7D&g_keyword=%7Bkeyword%7D&g_keywordid=%7Btargetid%7D&g_network=%7Bnetwork%7D"
+ },
+ {
+ "title": "Best For Side Sleepers",
+ "url": "https://www.wired.com/gallery/best-mattresses-1/#6744c6d3d637c52ee3e0af45"
+ },
+ {
+ "title": "Best Overall Mattress",
+ "url": "https://www.wired.com/gallery/best-mattresses-1/#6744bbaa05df4fffe2abbbe9"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -81,7 +76,7 @@
{
"cite": "https://www.stearnsandfoster.com",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 3,
@@ -112,137 +107,94 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "r/Mattress on Reddit",
- "title": "",
"url": "https://www.reddit.com/r/Mattress/"
},
{
- "misc": {},
"text": "NapLab",
- "title": "",
"url": "https://naplab.com/"
},
{
- "misc": {},
"text": "The Mattress Underground",
- "title": "",
"url": "https://mattressunderground.com/"
},
{
- "misc": {},
"text": "Sleep Foundation",
- "title": "",
"url": "https://www.sleepfoundation.org/best-mattress"
},
{
- "misc": {},
"text": "HifiGuides Forums",
- "title": "",
"url": "https://forum.hifiguides.com/t/need-some-help-to-choose-a-right-mattress/30570"
},
{
- "misc": {},
"text": "YouTube - Sleep Doctor",
- "title": "",
"url": "https://www.youtube.com/watch?v=aVi59gVcJjI"
},
{
- "misc": {},
"text": "DreamCloud",
- "title": "",
"url": "https://www.youtube.com/watch?v=QChKZrX9ILY"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/Mattress/#:~:text=Well%E2%80%A6%20after%20a%20few%20nights,my%20perspective%2C%20well%20worth%20it."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://mattressunderground.com/#:~:text=Consumer%20Self%2DEducation,Trusted%20Mattress%20Retailers/%20Manufacturers"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://forum.hifiguides.com/t/need-some-help-to-choose-a-right-mattress/30570#:~:text=I%20bought%20a%20mattress%20from,I%20sleep%20well%20on%20it."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/IAmA/comments/ssfzin/iama_professional_mattress_tester_in_the_last_6/#:~:text=I%20would%20go%20with%20the,experience%20a%20constant%20falling%20sensation."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/BuyItForLife/comments/1qcyffa/what_is_the_best_source_for_unbiased_mattress/#:~:text=EDIT:%20Sleepline%20is%20the%20best,reviews%20that%20I'm%20missing?"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.sleepfoundation.org/best-mattress#:~:text=%E2%80%9CThe%20mattress%20feels%20medium%20firm,wife%20indicated%20she%20is%20comfortable.%E2%80%9D"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=QChKZrX9ILY&t=292"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://forum.mattressunderground.com/t/best-or-least-terrible-national-brands/35917#:~:text=I%20watched%20every%20affiliate%20website,have%20body%20profile%20mattress%20testing."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=aVi59gVcJjI&t=189"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=U7rch7jeayg&t=352"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.repcalgaryhomes.ca/blog/canada-mattress-reviews.html#:~:text=Forums%20and%20Subreddits:%20Platforms%20like%20Reddit%20have,where%20users%20share%20genuine%20experiences%20and%20advice."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://naplab.com/best-mattress/#:~:text=Our%20unique%20data%20set%20is%20what%20sets,across%20a%20wide%20range%20of%20performance%20categories."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://naplab.com/mattress-reviews/snooze-flip-mattress-review/#:~:text=Sex%20Test%20The%20best%20mattresses%20for%20sex,lower%20than%20the%20average%20score%20of%209.6."
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -306,41 +258,30 @@
"heading": "Find related products & services",
"img_url": null,
"text": "Find related products & services|About the source|About the source|About the source|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These searches help you find relevant offers from advertisers",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Best |delivery| mattress",
- "title": "",
"url": "http://www.google.com/search?q=Best+delivery+mattress&mrq=1&mrqri=0&mrqei=5n2GaaDoEJH9wPAP_euTuAM&sa=X&ved=2ahUKEwig88v0hsaSAxWRPhAIHf31BDcQyO0OKAB6BAhYEAU"
},
{
- "misc": {},
"text": "Best mattress |2026",
- "title": "",
"url": "http://www.google.com/search?q=Best+mattress+2026&mrq=1&mrqri=1&mrqei=5n2GaaDoEJH9wPAP_euTuAM&sa=X&ved=2ahUKEwig88v0hsaSAxWRPhAIHf31BDcQyO0OKAF6BAhYEAY"
},
{
- "misc": {},
"text": "Best mattress |under $1000",
- "title": "",
"url": "http://www.google.com/search?q=Best+mattress+under+$1000&mrq=1&mrqri=2&mrqei=5n2GaaDoEJH9wPAP_euTuAM&sa=X&ved=2ahUKEwig88v0hsaSAxWRPhAIHf31BDcQyO0OKAJ6BAhYEAc"
},
{
- "misc": {},
"text": "Best mattress |side sleeper",
- "title": "",
"url": "http://www.google.com/search?q=Best+mattress+side+sleeper&mrq=1&mrqri=3&mrqei=5n2GaaDoEJH9wPAP_euTuAM&sa=X&ved=2ahUKEwig88v0hsaSAxWRPhAIHf31BDcQyO0OKAN6BAhYEAg"
},
{
- "misc": {},
"text": "Best |king mattresses",
- "title": "",
"url": "http://www.google.com/search?q=Best+king+mattresses&mrq=1&mrqri=4&mrqei=5n2GaaDoEJH9wPAP_euTuAM&sa=X&ved=2ahUKEwig88v0hsaSAxWRPhAIHf31BDcQyO0OKAR6BAhYEAk"
},
{
- "misc": {},
"text": "Best mattress |queen",
- "title": "",
"url": "http://www.google.com/search?q=Best+mattress+queen&mrq=1&mrqri=5&mrqei=5n2GaaDoEJH9wPAP_euTuAM&sa=X&ved=2ahUKEwig88v0hsaSAxWRPhAIHf31BDcQyO0OKAV6BAhYEAo"
}
]
@@ -554,32 +495,27 @@
{
"cite": "https://www.sleepnumber.com › queen › mattress",
"cmpt_rank": 13,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Presidents Day Sale",
- "url": "https://www.sleepnumber.com/pages/presidents-day-mattress-sale"
- },
- {
- "misc": {},
- "text": "",
- "title": "Queen Size Mattresses",
- "url": "https://www.sleepnumber.com/categories/all-mattresses/queen%7Bignore%7D?gclsrc=aw.ds&%7B_dsmrktparam%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Mattress For Back Pain",
- "url": "https://www.sleepnumber.com/categories/mattresses?test=2"
- },
- {
- "misc": {},
- "text": "",
- "title": "Take Our Bed Quiz",
- "url": "https://www.sleepnumber.com/pages/bed-quiz"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Presidents Day Sale",
+ "url": "https://www.sleepnumber.com/pages/presidents-day-mattress-sale"
+ },
+ {
+ "title": "Queen Size Mattresses",
+ "url": "https://www.sleepnumber.com/categories/all-mattresses/queen%7Bignore%7D?gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ },
+ {
+ "title": "Mattress For Back Pain",
+ "url": "https://www.sleepnumber.com/categories/mattresses?test=2"
+ },
+ {
+ "title": "Take Our Bed Quiz",
+ "url": "https://www.sleepnumber.com/pages/bed-quiz"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 24,
@@ -593,26 +529,23 @@
{
"cite": "https://www.sleepopolis.com › best › mattresses",
"cmpt_rank": 13,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Top Rated Mattresses of 2026",
- "url": "https://sleepopolis.com/pm/best-mattresses-2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Editor's #1 Pick For 2026",
- "url": "https://sleepopolis.com/pm/best-mattresses-2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "King Size Mattresses",
- "url": "https://sleepopolis.com/pm/best-king-mattress-dt/"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Top Rated Mattresses of 2026",
+ "url": "https://sleepopolis.com/pm/best-mattresses-2/"
+ },
+ {
+ "title": "Editor's #1 Pick For 2026",
+ "url": "https://sleepopolis.com/pm/best-mattresses-2/"
+ },
+ {
+ "title": "King Size Mattresses",
+ "url": "https://sleepopolis.com/pm/best-king-mattress-dt/"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 25,
@@ -626,26 +559,23 @@
{
"cite": "https://www.mattressfirm.com",
"cmpt_rank": 13,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Get Our Military Discount",
- "url": "https://www.mattressfirm.com/military-discount.html"
- },
- {
- "misc": {},
- "text": "",
- "title": "$499.99 Adjustable Bed Set",
- "url": "https://www.mattressfirm.com/en-us/stores/%7Bignore%7D?utm_kxconfid=t6c7wbovv&dma=%7Bloc_physical_ms%7D&geo_interest=%7Bloc_interest_ms%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "King Bed for a Queen Price",
- "url": "https://www.mattressfirm.com/mattresses/5637147600.c%7Bignore%7D?utm_kxconfid=t6c7wbovv&dma=%7Bloc_physical_ms%7D&geo_interest=%7Bloc_interest_ms%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Get Our Military Discount",
+ "url": "https://www.mattressfirm.com/military-discount.html"
+ },
+ {
+ "title": "$499.99 Adjustable Bed Set",
+ "url": "https://www.mattressfirm.com/en-us/stores/%7Bignore%7D?utm_kxconfid=t6c7wbovv&dma=%7Bloc_physical_ms%7D&geo_interest=%7Bloc_interest_ms%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ },
+ {
+ "title": "King Bed for a Queen Price",
+ "url": "https://www.mattressfirm.com/mattresses/5637147600.c%7Bignore%7D?utm_kxconfid=t6c7wbovv&dma=%7Bloc_physical_ms%7D&geo_interest=%7Bloc_interest_ms%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 26,
@@ -659,7 +589,7 @@
{
"cite": null,
"cmpt_rank": 14,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 27,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[d1ac0c4abb10].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[d1ac0c4abb10].json
index 666f430..91ab22e 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[d1ac0c4abb10].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[d1ac0c4abb10].json
@@ -17,7 +17,8 @@
"details": {
"heading": "ThinkingSearchingThinking a little longer",
"img_url": null,
- "text": "An AI Overview is not available for this search|An AI Overview is not available for this search|Can't generate an AI overview right now. Try again later.|Can't generate an AI overview right now. Try again later.|Thinking|Thinking|Searching|Searching|Thinking a little longer|Thinking a little longer|My Ad Center"
+ "text": "An AI Overview is not available for this search|An AI Overview is not available for this search|Can't generate an AI overview right now. Try again later.|Can't generate an AI overview right now. Try again later.|Thinking|Thinking|Searching|Searching|Thinking a little longer|Thinking a little longer|My Ad Center",
+ "type": "panel"
},
"error": null,
"section": "main",
@@ -46,12 +47,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "What to Do If You Test Positive for COVID-19 - CDPH CDPH - CA.gov https://www.cdph.ca.gov › CID › DCDC › Pages › What... CDPH - CA.gov https://www.cdph.ca.gov › CID › DCDC › Pages › What...",
- "COVID-19 symptoms and what to do - NHS nhs.uk https://www.nhs.uk › Conditions A to Z › COVID-19 nhs.uk https://www.nhs.uk › Conditions A to Z › COVID-19",
- "Staying Up to Date with COVID-19 Vaccines - CDC CDC https://www.cdc.gov › covid › vaccines › stay-up-to-date CDC https://www.cdc.gov › covid › vaccines › stay-up-to-date",
- "9 Tips to Recover Faster from COVID-19 or Flu - UnityPoint Health UnityPoint Health https://www.unitypoint.org › news-and-articles › 9-tips-t... UnityPoint Health https://www.unitypoint.org › news-and-articles › 9-tips-t..."
- ],
+ "details": {
+ "items": [
+ "What to Do If You Test Positive for COVID-19 - CDPH CDPH - CA.gov https://www.cdph.ca.gov › CID › DCDC › Pages › What... CDPH - CA.gov https://www.cdph.ca.gov › CID › DCDC › Pages › What...",
+ "COVID-19 symptoms and what to do - NHS nhs.uk https://www.nhs.uk › Conditions A to Z › COVID-19 nhs.uk https://www.nhs.uk › Conditions A to Z › COVID-19",
+ "Staying Up to Date with COVID-19 Vaccines - CDC CDC https://www.cdc.gov › covid › vaccines › stay-up-to-date CDC https://www.cdc.gov › covid › vaccines › stay-up-to-date",
+ "9 Tips to Recover Faster from COVID-19 or Flu - UnityPoint Health UnityPoint Health https://www.unitypoint.org › news-and-articles › 9-tips-t... UnityPoint Health https://www.unitypoint.org › news-and-articles › 9-tips-t..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -191,7 +195,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 12,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[d920789249af].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[d920789249af].json
index 98d4d51..44b04f5 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[d920789249af].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[d920789249af].json
@@ -56,12 +56,15 @@
{
"cite": null,
"cmpt_rank": 3,
- "details": [
- "The Legislative Branch Handout - USCIS USCIS https://www.uscis.gov › files › document › lesson-plans USCIS https://www.uscis.gov › files › document › lesson-plans",
- "Which party controls Congress in the USA?",
- "How Congress Works | Congressman Tim Walberg - House.gov Tim Walberg - House.gov https://walberg.house.gov › about › how-congress-works Tim Walberg - House.gov https://walberg.house.gov › about › how-congress-works",
- "House of Representatives and Senate: What's the Difference? - E S S AY E S S AY https://www.visitthecapitol.gov › resources-and-activities E S S AY https://www.visitthecapitol.gov › resources-and-activities"
- ],
+ "details": {
+ "items": [
+ "The Legislative Branch Handout - USCIS USCIS https://www.uscis.gov › files › document › lesson-plans USCIS https://www.uscis.gov › files › document › lesson-plans",
+ "Which party controls Congress in the USA?",
+ "How Congress Works | Congressman Tim Walberg - House.gov Tim Walberg - House.gov https://walberg.house.gov › about › how-congress-works Tim Walberg - House.gov https://walberg.house.gov › about › how-congress-works",
+ "House of Representatives and Senate: What's the Difference? - E S S AY E S S AY https://www.visitthecapitol.gov › resources-and-activities E S S AY https://www.visitthecapitol.gov › resources-and-activities"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 3,
@@ -134,41 +137,30 @@
"details": {
"heading": "Things to know",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "United States Congress - Wikipedia|wikipedia.org|https://en.wikipedia.org| › wiki › United_States_Congress",
- "title": "",
"url": "https://en.wikipedia.org/wiki/United_States_Congress#:~:text=A%20Congress%20covers%20two%20years,end%20on%20January%203%2C%202027."
},
{
- "misc": {},
"text": "What is the current U.S. Congress?",
- "title": "",
"url": "/search?sca_esv=7e23b67cda9c6a45&q=what+is+the+current+us+congress&sa=X&ved=2ahUKEwihu9m1_cWSAxXkSjABHQhbFMsQrooIegQIUhAP"
},
{
- "misc": {},
"text": "The Legislative Branch | whitehouse.gov|archives.gov|https://obamawhitehouse.archives.gov| › legislative-branch",
- "title": "",
"url": "https://obamawhitehouse.archives.gov/1600/legislative-branch#:~:text=Congress%2C%20as%20one%20of%20the,laws%20or%20change%20existing%20laws."
},
{
- "misc": {},
"text": "What does Congress do?",
- "title": "",
"url": "/search?sca_esv=7e23b67cda9c6a45&q=what+does+congress+do&sa=X&ved=2ahUKEwihu9m1_cWSAxXkSjABHQhbFMsQrooIegQIUhAe"
},
{
- "misc": {},
"text": "Membership of the 119th Congress: A Profile|congress.gov|https://www.congress.gov| › crs-product",
- "title": "",
"url": "https://www.congress.gov/crs-product/R48535#:~:text=The%20average%20age%20of%20Members,are%20four%2Dyear%20college%20graduates."
},
{
- "misc": {},
"text": "Congress age",
- "title": "",
"url": "/search?sca_esv=7e23b67cda9c6a45&q=congress+age&sa=X&ved=2ahUKEwihu9m1_cWSAxXkSjABHQhbFMsQrooIegQIUhAt"
}
]
@@ -312,7 +304,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 18,
@@ -327,7 +319,8 @@
"cite": null,
"cmpt_rank": 14,
"details": {
- "subtitle": "Governing body"
+ "subtitle": "Governing body",
+ "type": "panel"
},
"error": null,
"section": "rhs",
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[da9b4fce9ab0].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[da9b4fce9ab0].json
index c3423c8..7927669 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[da9b4fce9ab0].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[da9b4fce9ab0].json
@@ -14,32 +14,27 @@
{
"cite": "https://www.nerdwallet.com",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Top 10 Credit Card Deals",
- "url": "https://www.nerdwallet.com/ur/?nw_campaign_id=151060109504700800&utm_source=goog&utm_medium=cpc&utm_campaign=cc_mktg_paid_012716_branda&utm_term=%7Bkeyword%7D&utm_content=sl&mktg_hline=1126&mktg_place=5"
- },
- {
- "misc": {},
- "text": "",
- "title": "Compare 2026's Top Cards",
- "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAEQDRoCbGU&ae=2&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAAYASADEgILSvD_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_71&sig=AOD64_0k0w5A6hb_fiCS4eILKyJefbJjww&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Side-By-Side Comparisons",
- "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAEQDhoCbGU&ae=2&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAAYASAEEgJlW_D_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_71&sig=AOD64_3hNBVdvPH30lLvMaP8v7K2BvIhHQ&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Cash Back Cards",
- "url": "https://www.nerdwallet.com/ur/?nw_campaign_id=150238348949705700&utm_source=goog&utm_medium=cpc&utm_campaign=cc_mktg_paid_012716_branda&utm_term=%7Bkeyword%7D&utm_content=sl&mktg_hline=44&mktg_place=5"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Top 10 Credit Card Deals",
+ "url": "https://www.nerdwallet.com/ur/?nw_campaign_id=151060109504700800&utm_source=goog&utm_medium=cpc&utm_campaign=cc_mktg_paid_012716_branda&utm_term=%7Bkeyword%7D&utm_content=sl&mktg_hline=1126&mktg_place=5"
+ },
+ {
+ "title": "Compare 2026's Top Cards",
+ "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAEQDRoCbGU&ae=2&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAAYASADEgILSvD_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_71&sig=AOD64_0k0w5A6hb_fiCS4eILKyJefbJjww&adurl=&q="
+ },
+ {
+ "title": "Side-By-Side Comparisons",
+ "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAEQDhoCbGU&ae=2&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAAYASAEEgJlW_D_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_71&sig=AOD64_3hNBVdvPH30lLvMaP8v7K2BvIhHQ&adurl=&q="
+ },
+ {
+ "title": "Cash Back Cards",
+ "url": "https://www.nerdwallet.com/ur/?nw_campaign_id=150238348949705700&utm_source=goog&utm_medium=cpc&utm_campaign=cc_mktg_paid_012716_branda&utm_term=%7Bkeyword%7D&utm_content=sl&mktg_hline=44&mktg_place=5"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 0,
@@ -53,32 +48,27 @@
{
"cite": "https://www.forbes.com",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Top Credit Cards of 2026",
- "url": "https://www.forbes.com/advisor/l/top-credit-cards-v2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best Cash Back Cards",
- "url": "https://www.forbes.com/advisor/l/best-cash-back-credit-cards-v2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best Travel Credit Cards",
- "url": "https://www.forbes.com/advisor/l/best-travel-credit-cards-v2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best 0% APR Cards",
- "url": "https://www.forbes.com/advisor/l/best-0-apr-credit-cards-v2/"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Top Credit Cards of 2026",
+ "url": "https://www.forbes.com/advisor/l/top-credit-cards-v2/"
+ },
+ {
+ "title": "Best Cash Back Cards",
+ "url": "https://www.forbes.com/advisor/l/best-cash-back-credit-cards-v2/"
+ },
+ {
+ "title": "Best Travel Credit Cards",
+ "url": "https://www.forbes.com/advisor/l/best-travel-credit-cards-v2/"
+ },
+ {
+ "title": "Best 0% APR Cards",
+ "url": "https://www.forbes.com/advisor/l/best-0-apr-credit-cards-v2/"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -92,7 +82,7 @@
{
"cite": "https://www.thepointsguy.com",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 2,
@@ -106,32 +96,27 @@
{
"cite": "https://creditcards.chase.com › creditcards › learnmore",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Compare CHASE® Cards",
- "url": "https://creditcards.chase.com/?CELL=62FC&sitelink=1"
- },
- {
- "misc": {},
- "text": "",
- "title": "Chase Credit Cards",
- "url": "https://creditcards.chase.com/?CELL=62FC&sitelink=3"
- },
- {
- "misc": {},
- "text": "",
- "title": "CHASE® Travel Cards",
- "url": "https://creditcards.chase.com/travel-credit-cards?CELL=62FC&sitelink=3"
- },
- {
- "misc": {},
- "text": "",
- "title": "Compare Travel Cards",
- "url": "https://creditcards.chase.com/travel-credit-cards?CELL=62FC&sitelink=2"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Compare CHASE® Cards",
+ "url": "https://creditcards.chase.com/?CELL=62FC&sitelink=1"
+ },
+ {
+ "title": "Chase Credit Cards",
+ "url": "https://creditcards.chase.com/?CELL=62FC&sitelink=3"
+ },
+ {
+ "title": "CHASE® Travel Cards",
+ "url": "https://creditcards.chase.com/travel-credit-cards?CELL=62FC&sitelink=3"
+ },
+ {
+ "title": "Compare Travel Cards",
+ "url": "https://creditcards.chase.com/travel-credit-cards?CELL=62FC&sitelink=2"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 3,
@@ -148,179 +133,122 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Chase Freedom Unlimited®",
- "title": "",
"url": "https://www.google.com/search?q=Chase+Freedom+Unlimited%C2%AE&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEAw"
},
{
- "misc": {},
"text": "Chase Sapphire Preferred® Card",
- "title": "",
"url": "https://www.google.com/search?q=Chase+Sapphire+Preferred%C2%AE+Card&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEA0"
},
{
- "misc": {},
"text": "Capital One Venture X",
- "title": "",
"url": "https://www.google.com/search?q=Capital+One+Venture+X&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEA4"
},
{
- "misc": {},
"text": "Wells Fargo Active Cash® Card",
- "title": "",
"url": "https://www.google.com/search?q=Wells+Fargo+Active+Cash%C2%AE+Card&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEA8"
},
{
- "misc": {},
"text": "Citi® Double Cash Card",
- "title": "",
"url": "https://www.google.com/search?q=Citi%C2%AE+Double+Cash+Card&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEBA"
},
{
- "misc": {},
"text": "Capital One Savor Cash Rewards",
- "title": "",
"url": "https://www.google.com/search?q=Capital+One+Savor+Cash+Rewards&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEBE"
},
{
- "misc": {},
"text": "Amex Platinum Card®",
- "title": "",
"url": "https://www.google.com/search?q=Amex+Platinum+Card%C2%AE&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgAEBI"
},
{
- "misc": {},
"text": "Annual Fee",
- "title": "",
"url": "https://www.google.com/search?q=Annual+Fee&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgEEAE"
},
{
- "misc": {},
"text": "Sign-up Bonus",
- "title": "",
"url": "https://www.google.com/search?q=Sign-up+Bonus&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgEEAM"
},
{
- "misc": {},
"text": "Rewards Structure",
- "title": "",
"url": "https://www.google.com/search?q=Rewards+Structure&sei=vnyGaersKO6PwPAPqr3E8Ac&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQgK4QegYIAAgEEAU"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.bankrate.com/credit-cards/best-credit-cards/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nerdwallet.com/credit-cards/best#:~:text=Browse%20the%20best%20credit%20cards%20of%202026,card%20for%20you%20and%20apply%20in%20seconds."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.forbes.com/advisor/credit-cards/best-credit-cards/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.creditcards.com/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://usa.visa.com/pay-with-visa/find-card/apply-credit-card/good"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://finance.yahoo.com/personal-finance/credit-cards/#:~:text=The%20Chase%20Sapphire%20Preferred%20continues%20to%20be,select%20streaming%20services%2C%20and%20online%20grocery%20purchases."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://wallethub.com/answers/cc/which-credit-card-is-best-for-everything-2140781254/#:~:text=The%20best%20credit%20card%20for%20everything%20is,same%20rewards%20no%20matter%20what%20you%20buy."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://wallethub.com/d/citi-double-cash-card-121c#:~:text=Citi%20Double%20Cash%20Card%20Review%20Citi%20Double,total%20of%202%25%20back%20on%20all%20purchases."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.creditcards.com/cash-back/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://finance.yahoo.com/personal-finance/credit-cards/article/bank-credit-card-174043611.html#:~:text=To%20start%2C%20you%20should%20compare%20welcome%20offers,least%20$500%20within%20the%20first%20three%20months."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cnet.com/personal-finance/credit-cards/eating-out-have-one-of-these-cards-in-your-wallet/#:~:text=After%20comparing%20long%2Dterm%20value%20via%20rewards%20rates,back%20after%20you%20meet%20a%20spending%20threshold."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nerdwallet.com/credit-cards/best"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://wallethub.com/answers/cc/which-credit-card-is-best-for-everything-2140781254/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://finance.yahoo.com/personal-finance/credit-cards/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://wallethub.com/d/citi-double-cash-card-121c"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://finance.yahoo.com/personal-finance/credit-cards/article/bank-credit-card-174043611.html"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cnet.com/personal-finance/credit-cards/eating-out-have-one-of-these-cards-in-your-wallet/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -366,12 +294,15 @@
{
"cite": null,
"cmpt_rank": 4,
- "details": [
- "What is the very best credit card to get?",
- "Which is the best credit card right now?",
- "What credit score do you need for a $400,000 house?",
- "How many Americans have $20,000 in credit card debt?"
- ],
+ "details": {
+ "items": [
+ "What is the very best credit card to get?",
+ "Which is the best credit card right now?",
+ "What credit score do you need for a $400,000 house?",
+ "How many Americans have $20,000 in credit card debt?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 7,
@@ -403,41 +334,30 @@
"heading": "Find related products & services",
"img_url": null,
"text": "Find related products & services|About the source|About the source|About the source|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These searches help you find relevant offers from advertisers",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Best credit cards |2025 for everyday use",
- "title": "",
"url": "http://www.google.com/search?q=Best+credit+cards+2025+for+everyday+use&mrq=1&mrqri=0&mrqei=vnyGacmsPLCSwPAP5cu7oAk&sa=X&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQyO0OKAB6BAh1EAU"
},
{
- "misc": {},
"text": "Best credit cards |for business",
- "title": "",
"url": "http://www.google.com/search?q=Best+credit+cards+for+business&mrq=1&mrqri=1&mrqei=vnyGacmsPLCSwPAP5cu7oAk&sa=X&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQyO0OKAF6BAh1EAY"
},
{
- "misc": {},
"text": "Best credit cards |2025 for business",
- "title": "",
"url": "http://www.google.com/search?q=Best+credit+cards+2025+for+business&mrq=1&mrqri=2&mrqei=vnyGacmsPLCSwPAP5cu7oAk&sa=X&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQyO0OKAJ6BAh1EAc"
},
{
- "misc": {},
"text": "Best credit cards |balance transfer",
- "title": "",
"url": "http://www.google.com/search?q=Best+credit+cards+balance+transfer&mrq=1&mrqri=3&mrqei=vnyGacmsPLCSwPAP5cu7oAk&sa=X&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQyO0OKAN6BAh1EAg"
},
{
- "misc": {},
"text": "Best credit cards |2026",
- "title": "",
"url": "http://www.google.com/search?q=Best+credit+cards+2026&mrq=1&mrqri=4&mrqei=vnyGacmsPLCSwPAP5cu7oAk&sa=X&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQyO0OKAR6BAh1EAk"
},
{
- "misc": {},
"text": "Best credit cards |2026 for travel",
- "title": "",
"url": "http://www.google.com/search?q=Best+credit+cards+2026+for+travel&mrq=1&mrqri=5&mrqei=vnyGacmsPLCSwPAP5cu7oAk&sa=X&ved=2ahUKEwjJg-XnhcaSAxUwCRAIHeXlDpQQyO0OKAV6BAh1EAo"
}
]
@@ -735,32 +655,27 @@
{
"cite": "https://www.forbes.com",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Top Credit Cards of 2026",
- "url": "https://www.forbes.com/advisor/l/top-credit-cards-v2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best Cash Back Cards",
- "url": "https://www.forbes.com/advisor/l/best-cash-back-credit-cards-v2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best Travel Credit Cards",
- "url": "https://www.forbes.com/advisor/l/best-travel-credit-cards-v2/"
- },
- {
- "misc": {},
- "text": "",
- "title": "Best 0% APR Cards",
- "url": "https://www.forbes.com/advisor/l/best-0-apr-credit-cards-v2/"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Top Credit Cards of 2026",
+ "url": "https://www.forbes.com/advisor/l/top-credit-cards-v2/"
+ },
+ {
+ "title": "Best Cash Back Cards",
+ "url": "https://www.forbes.com/advisor/l/best-cash-back-credit-cards-v2/"
+ },
+ {
+ "title": "Best Travel Credit Cards",
+ "url": "https://www.forbes.com/advisor/l/best-travel-credit-cards-v2/"
+ },
+ {
+ "title": "Best 0% APR Cards",
+ "url": "https://www.forbes.com/advisor/l/best-0-apr-credit-cards-v2/"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 30,
@@ -774,26 +689,23 @@
{
"cite": "https://www.comparecredit.com",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Top No Annual Fee Cards",
- "url": "https://www.comparecredit.com/credit-cards/best/no-annual-fee/"
- },
- {
- "misc": {},
- "text": "",
- "title": "0% Intro APR Up to 21 Months",
- "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAIQDxoCbGU&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAMYAiAFEgJyufD_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_32&sig=AOD64_2Hs2i7iT3J06alnGVkKfoc4in9ag&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Up to $300 Welcome Offer",
- "url": "https://www.comparecredit.com/t/sem-cash-back-bonus"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Top No Annual Fee Cards",
+ "url": "https://www.comparecredit.com/credit-cards/best/no-annual-fee/"
+ },
+ {
+ "title": "0% Intro APR Up to 21 Months",
+ "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAIQDxoCbGU&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAMYAiAFEgJyufD_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_32&sig=AOD64_2Hs2i7iT3J06alnGVkKfoc4in9ag&adurl=&q="
+ },
+ {
+ "title": "Up to $300 Welcome Offer",
+ "url": "https://www.comparecredit.com/t/sem-cash-back-bonus"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 31,
@@ -807,32 +719,27 @@
{
"cite": "https://www.thepointsguy.com",
"cmpt_rank": 14,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Guide to 2026's Best Cards",
- "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAIQDBoCbGU&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAMYAyADEgKuX_D_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_32&sig=AOD64_2kX6jY8bcGd4HS661Lyu1GgWkkqw&adurl=&q="
- },
- {
- "misc": {},
- "text": "",
- "title": "Best Business Cards 2026",
- "url": "https://thepointsguy.com/credit-cards/business-pm/?stlnk=best_business"
- },
- {
- "misc": {},
- "text": "",
- "title": "February's Best Offers",
- "url": "https://thepointsguy.com/credit-cards/best-pm/?stlnk=Monthly_Best"
- },
- {
- "misc": {},
- "text": "",
- "title": "Earn 75,000 Points",
- "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAIQDhoCbGU&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAMYAyAEEgL_svD_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_32&sig=AOD64_0d0h7dkInQtAJF20h74mSNIaB6tQ&adurl=&q="
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Guide to 2026's Best Cards",
+ "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAIQDBoCbGU&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAMYAyADEgKuX_D_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_32&sig=AOD64_2kX6jY8bcGd4HS661Lyu1GgWkkqw&adurl=&q="
+ },
+ {
+ "title": "Best Business Cards 2026",
+ "url": "https://thepointsguy.com/credit-cards/business-pm/?stlnk=best_business"
+ },
+ {
+ "title": "February's Best Offers",
+ "url": "https://thepointsguy.com/credit-cards/best-pm/?stlnk=Monthly_Best"
+ },
+ {
+ "title": "Earn 75,000 Points",
+ "url": "/aclk?sa=L&ai=DChsSEwjUnuvnhcaSAxW5IaIDHcXTC9YYACICCAIQDhoCbGU&co=1&ase=2&gclid=EAIaIQobChMI1J7r54XGkgMVuSGiAx3F0wvWEAMYAyAEEgL_svD_BwE&cid=CAASugHkaJbrYikyGRtxE5PVYIXZ_HJa4tzMyLgaxemZkdvThrdMfELuwZbR7dC2hFg_w1Jt2R3jWHwFRE4SN_iAoQOyVpuM1Skrx61h12Q787PdErny8havDOHcCeYixlzvBTm1J_TUWAyzFHY-TBThm2CwQvCetcQpECtJw-KuurC-bb6G7xntE65mUMxJK89GTYHjevd_TheGfyts9SJq75NBs79z_kuoUhvKgjAnwqD9dhUuMTBmRrRuGKw&cce=2&category=acrcp_v1_32&sig=AOD64_0d0h7dkInQtAJF20h74mSNIaB6tQ&adurl=&q="
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 32,
@@ -846,7 +753,7 @@
{
"cite": null,
"cmpt_rank": 15,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 33,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[dc5861b33dda].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[dc5861b33dda].json
index cbc4d92..de28804 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[dc5861b33dda].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[dc5861b33dda].json
@@ -17,107 +17,74 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "ocean acidification",
- "title": "",
"url": "https://www.google.com/search?q=ocean+acidification&sei=M4KGabvlJ7SOosUP9JPZsAg&ved=2ahUKEwjVia6Ci8aSAxVf8DQHHWxHIMYQgK4QegQIARAE"
},
{
- "misc": {},
"text": "Mass Bleaching Events",
- "title": "",
"url": "https://www.google.com/search?q=Mass+Bleaching+Events&sei=M4KGabvlJ7SOosUP9JPZsAg&ved=2ahUKEwjVia6Ci8aSAxVf8DQHHWxHIMYQgK4QegQIAxAB"
},
{
- "misc": {},
"text": "Ocean Acidification",
- "title": "",
"url": "https://www.google.com/search?q=Ocean+Acidification&sei=M4KGabvlJ7SOosUP9JPZsAg&ved=2ahUKEwjVia6Ci8aSAxVf8DQHHWxHIMYQgK4QegQIAxAD"
},
{
- "misc": {},
"text": "National Ocean Service",
- "title": "",
"url": "https://oceanservice.noaa.gov/facts/coralreef-climate.html"
},
{
- "misc": {},
"text": "WWF-UK",
- "title": "",
"url": "https://www.wwf.org.uk/coral-reefs-and-climate-change"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://oceanservice.noaa.gov/facts/coralreef-climate.html#:~:text=A%20warming%20ocean:%20causes%20thermal,Visit%20FishWatch.gov."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.barrierreef.org/the-reef/threats/climate-change#:~:text=When%20corals%20suffer%20heat%20stress,and%20conditions%20return%20to%20normal."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.wwf.org.uk/coral-reefs-and-climate-change#:~:text=Corals%20are%20very%20vulnerable%20to,we're%20destroying%20the%20world."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://annualreport.mbari.org/2018/story/measuring-the-impact-of-climate-change-on-coral-reefs#:~:text=Great%20Barrier%20Reef.-,Sadly%2C%20coral%20reefs%20have%20been%20deteriorating%20worldwide%20due%20to%20human,fatal%20when%20warming%20is%20prolonged."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=k__Lu5UhIoI&t=2"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://coralreefs.org/wp-content/uploads/2020/02/modified-consensus-statement-ICRS-2018.pdf"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=FHhysWpSgtM"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.nationalgeographic.com/environment/article/coral-bleaching-causes-impacts#:~:text=Warming%20oceans%E2%80%94caused%20by%20climate,and%20widespread%20coral%20mortality%20occurred."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=U7VkFwURr6Q"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://impakter.com/fourth-and-furious-mass-coral-bleaching-84-of-reefs-now-under-threat/#:~:text=According%20to%20a%20new%20analysis%20by%20the,83.7%25%20of%20the%20world's%20coral%20reef%20area."
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -317,7 +284,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 14,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[e71a1cb4cd70].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[e71a1cb4cd70].json
index 1b0187d..35f9b84 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[e71a1cb4cd70].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[e71a1cb4cd70].json
@@ -17,35 +17,26 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Netflix|Subscription|Watch",
- "title": "",
"url": "https://www.netflix.com/watch/70143836?source=35"
},
{
- "misc": {},
"text": "YouTube|From |$1.99|Watch",
- "title": "",
"url": "https://www.youtube.com/watch?v=9VYSQkZP4Gc"
},
{
- "misc": {},
"text": "Apple TV|$1.99|Watch",
- "title": "",
"url": "https://tv.apple.com/us/episode/pilot/umc.cmc.6trodclp7sphpy2gelbgwb4fk?action=playSmartEpisode"
},
{
- "misc": {},
"text": "Fandango at Home|$1.99|Watch",
- "title": "",
"url": "https://athome.fandango.com/content/browse/details/Breaking-Bad-Felina/460653?cmp=OrganicSearch~Vudu~GoogleWatch"
},
{
- "misc": {},
"text": "Amazon Prime Video|From |$1.99|Watch",
- "title": "",
"url": "https://www.amazon.com/gp/video/detail/amzn1.dv.gti.f4a9f77c-aa6a-5843-447e-79083319cf7a?autoplay=0&ref_=atv_cf_strg_wb"
}
]
@@ -77,12 +68,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Where can I watch Breaking Bad all seasons?",
- "Where can I watch Breaking Bad other than Netflix?",
- "Breaking Bad Season 3 - Prime Video Prime Video https://www.primevideo.com › detail › Breaking-Bad Prime Video https://www.primevideo.com › detail › Breaking-Bad",
- "Breaking Bad - Wikipedia Wikipedia https://en.wikipedia.org › wiki › Breaking_Bad Wikipedia https://en.wikipedia.org › wiki › Breaking_Bad"
- ],
+ "details": {
+ "items": [
+ "Where can I watch Breaking Bad all seasons?",
+ "Where can I watch Breaking Bad other than Netflix?",
+ "Breaking Bad Season 3 - Prime Video Prime Video https://www.primevideo.com › detail › Breaking-Bad Prime Video https://www.primevideo.com › detail › Breaking-Bad",
+ "Breaking Bad - Wikipedia Wikipedia https://en.wikipedia.org › wiki › Breaking_Bad Wikipedia https://en.wikipedia.org › wiki › Breaking_Bad"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -250,7 +244,7 @@
{
"cite": null,
"cmpt_rank": 12,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 14,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[e828d00dc1b3].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[e828d00dc1b3].json
index 4ae531f..cd32502 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[e828d00dc1b3].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[e828d00dc1b3].json
@@ -14,7 +14,7 @@
{
"cite": "https://www.progressive.com",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 0,
@@ -28,7 +28,7 @@
{
"cite": "https://www.compare.com › compare › quotes",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 1,
@@ -42,32 +42,27 @@
{
"cite": "http://www.geico.com",
"cmpt_rank": 0,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Get A Quote",
- "url": "http://www.geico.com/landingpage/go558/?type=auto30&soa=48133"
- },
- {
- "misc": {},
- "text": "",
- "title": "Free Car Insurance Quotes",
- "url": "http://www.geico.com/landingpage/go558/?type=auto81&soa=39825%7Bignore%7D&ds_kid=%7B_dstrackerid%7D&ds_kids=%7B_dskeywordid%7D&s_kwcid=AL!9960!3!%7Bcreative%7D!%7Bmatchtype%7D!%7Bplacement%7D!%7Bnetwork%7D!%7Bproduct_partition_id%7D!%7Bkeyword%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Free Online Quotes 24/7",
- "url": "http://www.geico.com/landingpage/go558/?type=auto81&soa=39825%7Bignore%7D&ds_kid=%7B_dstrackerid%7D&ds_kids=%7B_dskeywordid%7D&s_kwcid=AL!9960!3!%7Bcreative%7D!%7Bmatchtype%7D!%7Bplacement%7D!%7Bnetwork%7D!%7Bproduct_partition_id%7D!%7Bkeyword%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
- },
- {
- "misc": {},
- "text": "",
- "title": "Multi-Policy Discount",
- "url": "http://www.geico.com/landingpage/go426a/?type=auto51&soa=23536"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Get A Quote",
+ "url": "http://www.geico.com/landingpage/go558/?type=auto30&soa=48133"
+ },
+ {
+ "title": "Free Car Insurance Quotes",
+ "url": "http://www.geico.com/landingpage/go558/?type=auto81&soa=39825%7Bignore%7D&ds_kid=%7B_dstrackerid%7D&ds_kids=%7B_dskeywordid%7D&s_kwcid=AL!9960!3!%7Bcreative%7D!%7Bmatchtype%7D!%7Bplacement%7D!%7Bnetwork%7D!%7Bproduct_partition_id%7D!%7Bkeyword%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ },
+ {
+ "title": "Free Online Quotes 24/7",
+ "url": "http://www.geico.com/landingpage/go558/?type=auto81&soa=39825%7Bignore%7D&ds_kid=%7B_dstrackerid%7D&ds_kids=%7B_dskeywordid%7D&s_kwcid=AL!9960!3!%7Bcreative%7D!%7Bmatchtype%7D!%7Bplacement%7D!%7Bnetwork%7D!%7Bproduct_partition_id%7D!%7Bkeyword%7D&gclsrc=aw.ds&%7B_dsmrktparam%7D"
+ },
+ {
+ "title": "Multi-Policy Discount",
+ "url": "http://www.geico.com/landingpage/go426a/?type=auto51&soa=23536"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
@@ -81,7 +76,7 @@
{
"cite": "https://www.insurify.com",
"cmpt_rank": 0,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 3,
@@ -123,12 +118,15 @@
{
"cite": null,
"cmpt_rank": 3,
- "details": [
- "Who normally has the cheapest car insurance?",
- "What's the best site for car insurance quotes?",
- "Cheap Car Insurance in Colorado for January 2026 - NerdWallet NerdWallet https://www.nerdwallet.com › insurance › auto › cheap-c... NerdWallet https://www.nerdwallet.com › insurance › auto › cheap-c...",
- "What is the cheapest auto insurance in FL?"
- ],
+ "details": {
+ "items": [
+ "Who normally has the cheapest car insurance?",
+ "What's the best site for car insurance quotes?",
+ "Cheap Car Insurance in Colorado for January 2026 - NerdWallet NerdWallet https://www.nerdwallet.com › insurance › auto › cheap-c... NerdWallet https://www.nerdwallet.com › insurance › auto › cheap-c...",
+ "What is the cheapest auto insurance in FL?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 6,
@@ -174,101 +172,70 @@
"heading": "Affordable Auto Insurance",
"img_url": null,
"text": "Sponsored results|Sponsored results|Sponsored results|Sponsored results|Sponsored results|Sponsored results|Affordable Auto Insurance|Affordable Auto Insurance|Progressive|Progressive|Progressive|https://www.progressive.com|https://www.progressive.com|Progressive|Progressive|Progressive|https://www.progressive.com|https://www.progressive.com|My Ad Center|6 Minute Quote|Compare Rates|Compare Auto Insurance Rates|Get an Auto Quote Today|Compare Car Insurance Rates | Enter Your Zip Code|Compare Car Insurance Rates | Enter Your Zip Code|compare.com|compare.com|compare.com|https://www.compare.com › compare › quotes|https://www.compare.com › compare › quotes|compare.com|compare.com|compare.com|https://www.compare.com › compare › quotes|https://www.compare.com › compare › quotes|My Ad Center| | |Rating|2026 Cheap Car Insurance|Car Insurance Calculator|Cheap Car Insurance|Compare Car Insurance|100K+ visits in past month|100K+ visits in past month|100K+ visits in past month|100K+ visits in past month|100K+ visits in past month|Show sponsored results|Hide sponsored results|Find related products & services|About the source|About the source|About the source|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These search suggestions help you find relevant offers from advertisers. Suggestions are ranked using a number of factors, including the relevance to your search terms, and ads on the next page. Google gets paid if you click on an ad on the next page.|These searches help you find relevant offers from advertisers",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Affordable Auto Insurance|Progressive|https://www.progressive.com",
- "title": "",
"url": "https://www.progressive.com/lp/auto-compare/"
},
{
- "misc": {},
"text": "6 Minute Quote",
- "title": "",
"url": "https://www.progressive.com/lp/auto-easy/?code=8004300041&se=Google&kwd=%7Bkeyword%7D&mt=%7Bmatchtype%7D&psd=%7Bdevice%7D&spn=%7Bnetwork%7D&adid=%7Bcreative%7D&adpos=%7Badposition%7D&phone=80044&srcfrc=true"
},
{
- "misc": {},
"text": "Compare Rates",
- "title": "",
"url": "https://www.progressive.com/lp/auto-compare/?code=8004300042&se=Google&kwd=%7Bkeyword%7D&mt=%7Bmatchtype%7D&psd=%7Bdevice%7D&spn=%7Bnetwork%7D&adid=%7Bcreative%7D&adpos=%7Badposition%7D&phone=80044&srcfrc=true"
},
{
- "misc": {},
"text": "Compare Auto Insurance Rates",
- "title": "",
"url": "https://www.progressive.com/lp/auto-compare/?code=8004309008&se=Google&kwd=%7Bkeyword%7D&mt=%7Bmatchtype%7D&psd=%7Bdevice%7D&spn=%7Bnetwork%7D&adid=%7Bcreative%7D&adpos=%7Badposition%7D&phone=80044&phoneNum=1&srcfrc=true"
},
{
- "misc": {},
"text": "Compare Car Insurance Rates | Enter Your Zip Code|compare.com|https://www.compare.com › compare › quotes",
- "title": "",
"url": "https://www.compare.com/compare-lp-3/"
},
{
- "misc": {},
"text": "Rating",
- "title": "",
"url": "https://www.google.com/shopping/ratings/account/metrics?q=compare.com&c=US&v=19"
},
{
- "misc": {},
"text": "2026 Cheap Car Insurance",
- "title": "",
"url": "/aclk?sa=L&ai=DChsSEwjAuOz6hcaSAxWYT5EFHVutGUcYACICCAIQCBoCbHI&co=1&ase=2&gclid=EAIaIQobChMIwLjs-oXGkgMVmE-RBR1brRlHEAMYAiAEEgLdXvD_BwE&cid=CAASugHkaArEEqd8oJDVR1GE9BHNFDxlkq1u5-GgaYQ49Um21jiXaliI1dgZUuR9aiIdd9xISYOJITv1rYCCx7LCoVTTPQEPhodiOQNvuUMw4ogXFClHEEAhZrWHZMOYtmDFzUAmpje5roG0B_9IVYGKUkgVWYMd5Klr84ENFiPGaCxyh8mZW9-6eFfuz1cKGKVvrr_7mCFrvjDglDxVZdF-aPnxgreL1lH-IyROyjkb6QoBq3WMJ0xsLIjb4G8&cce=2&category=acrcp_v1_32&sig=AOD64_1a83B19JlPSq_8C-tcYLUYMa3n-g&adurl=&q="
},
{
- "misc": {},
"text": "Car Insurance Calculator",
- "title": "",
"url": "https://www.compare.com/rlp/450"
},
{
- "misc": {},
"text": "Cheap Car Insurance",
- "title": "",
"url": "https://www.compare.com/cheap-car-insurance"
},
{
- "misc": {},
"text": "Compare Car Insurance",
- "title": "",
"url": "https://www.compare.com/auto-insurance/compare-insurance-companies"
},
{
- "misc": {},
"text": "Cheapest| car insurance |in California",
- "title": "",
"url": "http://www.google.com/search?q=Cheapest+car+insurance+in+California&mrq=1&mrqri=0&mrqei=5nyGafP4Msun1fIP6OuB-A0&sa=X&ved=2ahUKEwjzg-X6hcaSAxXLU1UIHeh1AN8QyO0OKAB6BAheEAU"
},
{
- "misc": {},
"text": "Car insurance quotes |cheapest",
- "title": "",
"url": "http://www.google.com/search?q=Car+insurance+quotes+cheapest&mrq=1&mrqri=1&mrqei=5nyGafP4Msun1fIP6OuB-A0&sa=X&ved=2ahUKEwjzg-X6hcaSAxXLU1UIHeh1AN8QyO0OKAF6BAheEAY"
},
{
- "misc": {},
"text": "Best| car insurance quotes",
- "title": "",
"url": "http://www.google.com/search?q=Best+car+insurance+quotes&mrq=1&mrqri=2&mrqei=5nyGafP4Msun1fIP6OuB-A0&sa=X&ved=2ahUKEwjzg-X6hcaSAxXLU1UIHeh1AN8QyO0OKAJ6BAheEAc"
},
{
- "misc": {},
"text": "Car insurance quotes |full coverage cheap",
- "title": "",
"url": "http://www.google.com/search?q=Car+insurance+quotes+full+coverage+cheap&mrq=1&mrqri=3&mrqei=5nyGafP4Msun1fIP6OuB-A0&sa=X&ved=2ahUKEwjzg-X6hcaSAxXLU1UIHeh1AN8QyO0OKAN6BAheEAg"
},
{
- "misc": {},
"text": "Compare| car insurance quotes",
- "title": "",
"url": "http://www.google.com/search?q=Compare+car+insurance+quotes&mrq=1&mrqri=4&mrqei=5nyGafP4Msun1fIP6OuB-A0&sa=X&ved=2ahUKEwjzg-X6hcaSAxXLU1UIHeh1AN8QyO0OKAR6BAheEAk"
},
{
- "misc": {},
"text": "Best| car insurance",
- "title": "",
"url": "http://www.google.com/search?q=Best+car+insurance&mrq=1&mrqri=5&mrqei=5nyGafP4Msun1fIP6OuB-A0&sa=X&ved=2ahUKEwjzg-X6hcaSAxXLU1UIHeh1AN8QyO0OKAV6BAheEAo"
}
]
@@ -398,14 +365,17 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [
- "The General",
- "Direct Auto Insurance",
- "Esurance",
- "Allstate",
- "Progressive",
- "State Farm"
- ],
+ "details": {
+ "items": [
+ "The General",
+ "Direct Auto Insurance",
+ "Esurance",
+ "Allstate",
+ "Progressive",
+ "State Farm"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "footer",
"serp_rank": 18,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[eab14aa4ff5d].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[eab14aa4ff5d].json
index 6503348..1ae85dc 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[eab14aa4ff5d].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[eab14aa4ff5d].json
@@ -59,113 +59,78 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Ben Parkes|YouTube • Jan 11, 2026",
- "title": "",
"url": "https://www.youtube.com/watch?v=3IqKhHUZwpg&t=36"
},
{
- "misc": {},
"text": "Running With Jane|YouTube • Sep 24, 2024",
- "title": "",
"url": "https://www.youtube.com/watch?v=TAXvNyavl_c&t=495"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/Marathon_Training/comments/1jsrdzq/any_tips_here_for_a_first_marathon_runner/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=3IqKhHUZwpg"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://42krunning.com/en/10-tips-for-your-first-marathon/#:~:text=Here%20are%20some%20tips%20for%20your%20first,Km32%20*%20Avoid%20sudden%20changes%20in%20pace"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=v7OCbZv2iEM"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/Marathon_Training/comments/18r1rsq/first_time_marathon_runner_what_advice_did_you/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/running/comments/wllvue/your_number_one_tip_for_a_first_time_marathoner/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "http://www.runningandstuff.com/surviving-your-first-marathon"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.reddit.com/r/Marathon_Training/comments/179mylo/what_are_some_things_you_wish_you_knew_before/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://coachdebbieruns.com/your-first-marathon/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=ZdOcD1UmLNs"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=kBo-q4DwqPM"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://highfive.co.uk/blogs/news/top-5-tips-on-marathon-race-day#:~:text=Here%20are%20some%20tips%20for%20surviving%20your,to%20other%20runners%20*%20Enjoy%20the%20music"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://42krunning.com/en/10-tips-for-your-first-marathon/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://highfive.co.uk/blogs/news/top-5-tips-on-marathon-race-day"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -183,12 +148,15 @@
{
"cite": null,
"cmpt_rank": 4,
- "details": [
- "What is the 10-10-10 rule for marathons?",
- "Training For a Marathon: How To Prepare | REI Expert Advice REI https://www.rei.com › learn › training-for-your-first-mar... REI https://www.rei.com › learn › training-for-your-first-mar...",
- "What is the 80% rule in running?",
- "Run your first marathon - TimeOutdoors TimeOutdoors https://www.timeoutdoors.com › blog › running-your-fir... TimeOutdoors https://www.timeoutdoors.com › blog › running-your-fir..."
- ],
+ "details": {
+ "items": [
+ "What is the 10-10-10 rule for marathons?",
+ "Training For a Marathon: How To Prepare | REI Expert Advice REI https://www.rei.com › learn › training-for-your-first-mar... REI https://www.rei.com › learn › training-for-your-first-mar...",
+ "What is the 80% rule in running?",
+ "Run your first marathon - TimeOutdoors TimeOutdoors https://www.timeoutdoors.com › blog › running-your-fir... TimeOutdoors https://www.timeoutdoors.com › blog › running-your-fir..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 4,
@@ -608,38 +576,31 @@
{
"cite": "https://www.runnersworld.com",
"cmpt_rank": 12,
- "details": [
- {
- "misc": {},
- "text": "",
- "title": "Treadmills",
- "url": "https://www.runnersworld.com/gear/a20834859/the-best-treadmills-for-runners/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_a20834859"
- },
- {
- "misc": {},
- "text": "",
- "title": "Advanced Running Watches",
- "url": "https://www.runnersworld.com/gear/a20827055/advanced-gps-watches-for-runners/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_a20827055"
- },
- {
- "misc": {},
- "text": "",
- "title": "Running Socks",
- "url": "https://www.runnersworld.com/gear/a20809978/the-best-socks-for-runners/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_a20809978"
- },
- {
- "misc": {},
- "text": "",
- "title": "Under Desk Treadmills",
- "url": "https://www.runnersworld.com/health-injuries/g36353450/best-under-desk-treadmill/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_g36353450"
- },
- {
- "misc": {},
- "text": "",
- "title": "Steppers",
- "url": "https://www.runnersworld.com/gear/g38971093/best-steppers/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_g38971093"
- }
- ],
+ "details": {
+ "items": [
+ {
+ "title": "Treadmills",
+ "url": "https://www.runnersworld.com/gear/a20834859/the-best-treadmills-for-runners/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_a20834859"
+ },
+ {
+ "title": "Advanced Running Watches",
+ "url": "https://www.runnersworld.com/gear/a20827055/advanced-gps-watches-for-runners/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_a20827055"
+ },
+ {
+ "title": "Running Socks",
+ "url": "https://www.runnersworld.com/gear/a20809978/the-best-socks-for-runners/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_a20809978"
+ },
+ {
+ "title": "Under Desk Treadmills",
+ "url": "https://www.runnersworld.com/health-injuries/g36353450/best-under-desk-treadmill/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_g36353450"
+ },
+ {
+ "title": "Steppers",
+ "url": "https://www.runnersworld.com/gear/g38971093/best-steppers/?utm_source=google&utm_medium=cpc&utm_campaign=mgu_ga_rnw_ext_comm_org_us_g38971093"
+ }
+ ],
+ "type": "menu"
+ },
"error": null,
"section": "main",
"serp_rank": 34,
@@ -653,7 +614,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 35,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[f006c9318116].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[f006c9318116].json
index 0013593..26ed472 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[f006c9318116].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[f006c9318116].json
@@ -17,131 +17,90 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Pfizer-BioNTech",
- "title": "",
"url": "https://www.pfizer.com/"
},
{
- "misc": {},
"text": "Novavax",
- "title": "",
"url": "https://www.novavax.com/"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cdc.gov/covid/hcp/vaccine-considerations/routine-guidance.html#:~:text=At%20a%20glance,of%20COVID%2D19%20risk%20factors."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.cdc.gov/covid/vaccines/stay-up-to-date.html#:~:text=Keep%20in%20mind,individual%2Dbased%20decision%2Dmaking."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.bassett.org/news/are-covid-19-vaccine-ingredients-safe"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.aap.org/en/patient-care/covid-19/covid-19-vaccine-frequently-asked-questions/#:~:text=AAP%20recommends%20a%20single%20dose,AAP%20Policy%20Statement%20as%20follows:"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.pharmacytimes.com/view/fda-recommends-2025-2026-covid-19-vaccines-be-monovalent-target-lp-8-1-strain#:~:text=Based%20on%20VRBPAC%20Vote%2C%20FDA,a%20specific%20sublineage%20to%20target.&text=1%2C2-,LP.,recommendation%20could%20impact%20vaccine%20availability."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC10507236/#:~:text=Rare%20adverse%20effects%20of%20COVID,%2C%20hearing%20changes%2C%20and%20tinnitus.&text=The%20overall%20risk%20of%20anaphylaxis,percentage%20of%20people%20after%20vaccination."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.fda.gov/media/167212/download#:~:text=Pfizer%2DBioNTech%20COVID%2D19%20Vaccine%20contains%20the%20following%20ingredients:,age%20also%20contains%20sodium%20chloride."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.hackensackmeridianhealth.org/HealthU/2021/01/11/a-simple-breakdown-of-the-ingredients-in-the-covid-vaccines/#:~:text=The%20Pfizer%2DBioNTech%20COVID%2D19,if%20exposed%20to%20the%20coronavirus."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.chop.edu/vaccine-education-center/vaccine-safety/vaccine-ingredients/ingredients-by-vaccine/covid-19-vaccines-ingredients#:~:text=Cholesterol%2C%20Potassium%20chloride%2C%20Phosphatidylcholine%2C,sodium%20hydroxide%20or%20hydrochloric%20acid.)"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.publichealth.columbia.edu/news/vaccine-misinformation-outpaces-efforts-counter-it#:~:text=Misinformation%20about%20vaccines%20has%20proliferated,flu%2C%20HPV%2C%20and%20more."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.sciencedirect.com/science/article/pii/S2772628224000098#:~:text=Among%20previously%20vaccinated%20individuals%2C%20belief,ongoing%20vaccine%20boosters%2C%20ceteris%20paribus."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.mskcc.org/coronavirus/for-people-with-cancer#:~:text=Is%20the%20COVID%2D19%20vaccine%20safe?,reported%20after%20COVID%2D19%20vaccination."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=hlOFL---m8A"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.youtube.com/watch?v=hRZ1eTNX65w"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.rivm.nl/en/coronavirus-covid-19/vaccination/side-effects#:~:text=Most%20common%20side%20effects%20of,or%20days%20after%20the%20injection."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.mcgill.ca/oss/article/covid-19-health/dozen-misguided-influencers-spread-most-anti-vaccination-content-social-media#:~:text=You%20may%20have%20heard%20the,exactly%20is%20the%20Disinformation%20Dozen?"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.goodrx.com/conditions/covid-19/ingredients-covid-19-vaccine#:~:text=COVID%20vaccine%20are:-,SM%2D102,is%20the%20hepatitis%20B%20vaccine."
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -173,12 +132,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "Why are people refusing the COVID vaccine?",
- "Stanford Medicine study shows why mRNA COVID-19 vaccine can ... Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid",
- "Scientists Study People Who Never Had COVID - Boston University Boston University https://www.bu.edu › articles › scientists-study-people-w... Boston University https://www.bu.edu › articles › scientists-study-people-w...",
- "What percentage of people have a serious side effect from the COVID vaccine?"
- ],
+ "details": {
+ "items": [
+ "Why are people refusing the COVID vaccine?",
+ "Stanford Medicine study shows why mRNA COVID-19 vaccine can ... Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid Stanford Medicine - Stanford University https://med.stanford.edu › myocarditis-vaccine-covid",
+ "Scientists Study People Who Never Had COVID - Boston University Boston University https://www.bu.edu › articles › scientists-study-people-w... Boston University https://www.bu.edu › articles › scientists-study-people-w...",
+ "What percentage of people have a serious side effect from the COVID vaccine?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 2,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[f6fae1c9a96e].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[f6fae1c9a96e].json
index 7d3bf00..a1766cc 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[f6fae1c9a96e].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[f6fae1c9a96e].json
@@ -17,59 +17,42 @@
"details": {
"heading": "Apple Stock Is Steady as a Rock Amid AI Selloff: How to Play AAPL Stock",
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Disclaimer",
- "title": "",
"url": "/intl/en_us/googlefinance/disclaimer/?sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4Q1AF6BAgrEAQ"
},
{
- "misc": {},
"text": "Yahoo Finance|Apple Stock Is Steady as a Rock Amid AI Selloff: How to Play AAPL Stock|The rally in artificial intelligence (AI) stocks has reversed course as markets get wary about tech companies' spending spree without...|.|5 hours ago",
- "title": "",
"url": "https://finance.yahoo.com/news/apple-stock-steady-rock-amid-192518495.html"
},
{
- "misc": {},
"text": "Microsoft Corp|401.14| USD|1.90%",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=NASDAQ:+MSFT&stick=H4sIAAAAAAAAAONgecRoyi3w8sc9YSmdSWtOXmNU4-IKzsgvd80rySypFJLgYoOy-KR4uLj0c_UNzKty8uIteRax8vg5Brs4Blop-Aa7hQAAJFwipEkAAAA&sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4Qoe4JegQILRAF"
},
{
- "misc": {},
"text": "Samsung Electronics 1 GDS Representing 25 Ord Shs|1,690.00| EUR|4.84%",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=FRA:+SSUN&stick=H4sIAAAAAAAAAONgecRoxi3w8sc9YSndSWtOXmNU5-IKzsgvd80rySypFJLkYoOy-KV4ubj10_UNU8pMzJIqzXkWsXK6BTlaKQQHh_oBAMZYdqRHAAAA&sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4Qoe4JegQILRAG"
},
{
- "misc": {},
"text": "NVIDIA Corp|185.41| USD|7.87%",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=NASDAQ:+NVDA&stick=H4sIAAAAAAAAAONgecRoyi3w8sc9YSmdSWtOXmNU4-IKzsgvd80rySypFJLgYoOy-KR4uLj0c_UNzKtycqpSeBax8vg5Brs4Blop-IW5OAIA9TcWekkAAAA&sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4Qoe4JegQILRAH"
},
{
- "misc": {},
"text": "Amazon.com Inc|210.32| USD|5.55%",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=NASDAQ:+AMZN&stick=H4sIAAAAAAAAAONgecRoyi3w8sc9YSmdSWtOXmNU4-IKzsgvd80rySypFJLgYoOy-KR4uLj0c_UNzKtyLA2yeRax8vg5Brs4BlopOPpG-QEA3pWyeEkAAAA&sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4Qoe4JegQILRAI"
},
{
- "misc": {},
"text": "Add investments",
- "title": "",
"url": "https://www.google.com/finance/portfolio/watchlist?sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4QnIcKegQILRAJ"
},
{
- "misc": {},
"text": " Disclaimer ",
- "title": "",
"url": "/intl/en_us/googlefinance/disclaimer/"
},
{
- "misc": {},
"text": "More about Apple Inc",
- "title": "",
"url": "https://www.google.com/finance/quote/AAPL:NASDAQ?sa=X&ved=2ahUKEwjt6qyFicaSAxX5RzABHWAvPe4Q3ecFKAV6BAgnEAY"
}
]
@@ -143,12 +126,15 @@
{
"cite": null,
"cmpt_rank": 2,
- "details": [
- "How much is $10,000 invested in Apple 20 years ago?",
- "What is the price target for Apple stock?",
- "Will Apple stock reach $700 again?",
- "Apple - 46 Year Stock Price History | AAPL - Macrotrends Macrotrends https://www.macrotrends.net › stocks › charts › stock-pri... Macrotrends https://www.macrotrends.net › stocks › charts › stock-pri..."
- ],
+ "details": {
+ "items": [
+ "How much is $10,000 invested in Apple 20 years ago?",
+ "What is the price target for Apple stock?",
+ "Will Apple stock reach $700 again?",
+ "Apple - 46 Year Stock Price History | AAPL - Macrotrends Macrotrends https://www.macrotrends.net › stocks › charts › stock-pri... Macrotrends https://www.macrotrends.net › stocks › charts › stock-pri..."
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 5,
@@ -176,7 +162,7 @@
{
"cite": null,
"cmpt_rank": 4,
- "details": [],
+ "details": null,
"error": null,
"section": "main",
"serp_rank": 7,
diff --git a/tests/__snapshots__/test_parse_serp/test_parse_serp[faa9c7c889db].json b/tests/__snapshots__/test_parse_serp/test_parse_serp[faa9c7c889db].json
index 459a35a..37db26a 100644
--- a/tests/__snapshots__/test_parse_serp/test_parse_serp[faa9c7c889db].json
+++ b/tests/__snapshots__/test_parse_serp/test_parse_serp[faa9c7c889db].json
@@ -18,17 +18,14 @@
"heading": "Choose what you’re giving feedback on",
"img_url": null,
"text": "English - detected|English - detected|Japanese|hello|həˈlō| Learn to pronounce | Learn to pronounce |Learn to pronounce|こんにちは|. . .|.|.|.|Kon'nichiwa|. . .|.|.|.| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translation|. . .|.|.|.|(feminine)| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translation|. . .|.|.|.|(masculine)| Learn to pronounce | Learn to pronounce |Learn to pronounce|Search for this on Google|Search for this on Google|Search for this on Google|Translations of hello|hello|今日は|今日は|Recent languages Japanese All languages Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Detect language Recent languages All languages Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu Abkhazian Acehnese Acoli Afar Afrikaans Akan Albanian Alur Amharic Arabic Armenian Assamese Avaric Awadhi Aymara Azerbaijani Balinese Baluchi Bambara Bangla Baoulé Bashkir Basque Batak Karo Batak Simalungun Batak Toba Belarusian Bemba Betawi Bhojpuri Bikol Bosnian Breton Bulgarian Buriat Burmese Cantonese Catalan Cebuano Central Kurdish Chamorro Chechen Chiga Chinese (Simplified) Chinese (Traditional) Chuukese Chuvash Corsican Crimean Tatar Croatian Czech Danish Dari Dinka Divehi Dogri Dombe Dutch Dyula Dzongkha English Esperanto Estonian Ewe Faroese Fijian Filipino Finnish Fon French Friulian Fulani Ga Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Hakha Chin Hausa Hawaiian Hebrew Hiligaynon Hindi Hmong Hungarian Hunsrik Iban Icelandic Igbo Iloko Indonesian Irish Italian Jamaican Patois Japanese Javanese Jingpo Kalaallisut Kannada Kanuri Kazakh Khasi Khmer Kinyarwanda Kituba Kokborok Komi Kongo Konkani Korean Krio Kurdish Kyrgyz Lao Latgalian Latin Latvian Ligurian Limburgish Lingala Lithuanian Lombard Luo Luxembourgish Macedonian Madurese Maithili Makasar Malagasy Malay Malay (Arabic) Malayalam Maltese Mam Manipuri (Meitei Mayek) Manx Māori Marathi Marshallese Marwari Meadow Mari Minangkabau Mizo Mongolian Morisyen Nahuatl (Eastern Huasteca) Ndau Nepalbhasa (Newari) Nepali NKo Northern Sami Northern Sotho Norwegian Nuer Nyanja Occitan Odia Oromo Ossetic Pampanga Pangasinan Papiamento Pashto Persian Polish Portuguese Portuguese (Portugal) Punjabi Punjabi (Arabic) Q'eqchi' Quechua Romanian Romany Rundi Russian Samoan Sango Sanskrit Santali (Latin) Scottish Gaelic Serbian Seselwa Creole French Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Ndebele Southern Sotho Spanish Sundanese Susu Swahili Swati Swedish Tahitian Tajik Tamazight Tamazight (Tifinagh) Tamil Tatar Telugu Tetum Thai Tibetan Tigrinya Tiv Tok Pisin Tongan Tsonga Tswana Tulu Tumbuka Turkish Turkmen Tuvinian Udmurt Ukrainian Urdu Uyghur Uzbek Venda Venetian Vietnamese Waray Welsh Western Frisian Wolof Xhosa Yakut Yiddish Yoruba Yucatec Maya Zapotec Zulu| Show more | Show more |Show less|Show less|Can't update the translation right now. Try again later.|Can't update the translation right now. Try again later.|•|Feedback",
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": " | |Learn to pronounce| | ",
- "title": "",
"url": "/search?sca_esv=a23961d46812ec7b&q=how+to+pronounce+hello&stick=H4sIAAAAAAAAAOMIfcRozi3w8sc9YSm9SWtOXmPU4OINKMrPK81LzkwsyczPExLkYglJLcoV4pRi52LNSM3JybdiUWJKzeNZxCqWkV-uUJKvUADUkA_UkaoAlgcATlGyjlUAAAA&pron_lang=en&pron_country=US&sa=X&ved=2ahUKEwiXy9uWicaSAxXWQFUIHS1wIHoQ9tEEegQIDBAS"
},
{
- "misc": {},
"text": "Open in Google Translate",
- "title": "",
"url": "https://translate.google.com/?um=1&ie=UTF-8&hl=en&client=tw-ob#auto/ja/hello"
}
]
@@ -46,12 +43,15 @@
{
"cite": null,
"cmpt_rank": 1,
- "details": [
- "Difference between \"Moshi Moshi\" and \"Konichiwa\"? - Facebook Facebook https://www.facebook.com › groups › posts Facebook https://www.facebook.com › groups › posts",
- "How do you greet someone in Japanese?",
- "18 Ways to Say Hello in Japanese Like a Native Speaker Busuu https://www.busuu.com › japanese › greetings Busuu https://www.busuu.com › japanese › greetings",
- "What do we say 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 in Japanese?"
- ],
+ "details": {
+ "items": [
+ "Difference between \"Moshi Moshi\" and \"Konichiwa\"? - Facebook Facebook https://www.facebook.com › groups › posts Facebook https://www.facebook.com › groups › posts",
+ "How do you greet someone in Japanese?",
+ "18 Ways to Say Hello in Japanese Like a Native Speaker Busuu https://www.busuu.com › japanese › greetings Busuu https://www.busuu.com › japanese › greetings",
+ "What do we say 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 in Japanese?"
+ ],
+ "type": "text"
+ },
"error": null,
"section": "main",
"serp_rank": 1,
@@ -68,89 +68,62 @@
"details": {
"heading": null,
"img_url": null,
+ "type": "panel",
"urls": [
{
- "misc": {},
"text": "Konnichiwa",
- "title": "",
"url": "https://www.google.com/search?q=Konnichiwa&sei=RoCGaZDoF5-6wPAP1eqxkQs&mstk=AUtExfDF1DP_Av9YFoezpnP5xd6WyZSbFfGkmasLvLj5fftfGDvIECkiAmiI6Dt7hx3QFtPDBuhFW_JjITWCNmKTEEtooGkt6689X_g-wRzqiC7iISCSAK_9FglJwUulsVyRxkI&csui=3&ved=2ahUKEwjN7vqWicaSAxWBCRAIHftyAV4QgK4QegQIAxAB"
},
{
- "misc": {},
"text": "Ohayō",
- "title": "",
"url": "https://www.google.com/search?q=Ohay%C5%8D&sei=RoCGaZDoF5-6wPAP1eqxkQs&mstk=AUtExfDF1DP_Av9YFoezpnP5xd6WyZSbFfGkmasLvLj5fftfGDvIECkiAmiI6Dt7hx3QFtPDBuhFW_JjITWCNmKTEEtooGkt6689X_g-wRzqiC7iISCSAK_9FglJwUulsVyRxkI&csui=3&ved=2ahUKEwjN7vqWicaSAxWBCRAIHftyAV4QgK4QegQIAxAD"
},
{
- "misc": {},
"text": "Ohayō Gozaimasu",
- "title": "",
"url": "https://www.google.com/search?q=Ohay%C5%8D+Gozaimasu&sei=RoCGaZDoF5-6wPAP1eqxkQs&mstk=AUtExfDF1DP_Av9YFoezpnP5xd6WyZSbFfGkmasLvLj5fftfGDvIECkiAmiI6Dt7hx3QFtPDBuhFW_JjITWCNmKTEEtooGkt6689X_g-wRzqiC7iISCSAK_9FglJwUulsVyRxkI&csui=3&ved=2ahUKEwjN7vqWicaSAxWBCRAIHftyAV4QgK4QegQIAxAF"
},
{
- "misc": {},
"text": "Konbanwa",
- "title": "",
"url": "https://www.google.com/search?q=Konbanwa&sei=RoCGaZDoF5-6wPAP1eqxkQs&mstk=AUtExfDF1DP_Av9YFoezpnP5xd6WyZSbFfGkmasLvLj5fftfGDvIECkiAmiI6Dt7hx3QFtPDBuhFW_JjITWCNmKTEEtooGkt6689X_g-wRzqiC7iISCSAK_9FglJwUulsVyRxkI&csui=3&ved=2ahUKEwjN7vqWicaSAxWBCRAIHftyAV4QgK4QegQIAxAH"
},
{
- "misc": {},
"text": "Moshi Moshi",
- "title": "",
"url": "https://www.google.com/search?q=Moshi+Moshi&sei=RoCGaZDoF5-6wPAP1eqxkQs&mstk=AUtExfDF1DP_Av9YFoezpnP5xd6WyZSbFfGkmasLvLj5fftfGDvIECkiAmiI6Dt7hx3QFtPDBuhFW_JjITWCNmKTEEtooGkt6689X_g-wRzqiC7iISCSAK_9FglJwUulsVyRxkI&csui=3&ved=2ahUKEwjN7vqWicaSAxWBCRAIHftyAV4QgK4QegQIAxAJ"
},
{
- "misc": {},
"text": "Hajimemashite",
- "title": "",
"url": "https://www.google.com/search?q=Hajimemashite&sei=RoCGaZDoF5-6wPAP1eqxkQs&mstk=AUtExfDF1DP_Av9YFoezpnP5xd6WyZSbFfGkmasLvLj5fftfGDvIECkiAmiI6Dt7hx3QFtPDBuhFW_JjITWCNmKTEEtooGkt6689X_g-wRzqiC7iISCSAK_9FglJwUulsVyRxkI&csui=3&ved=2ahUKEwjN7vqWicaSAxWBCRAIHftyAV4QgK4QegQIAxAL"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.busuu.com/en/japanese/greetings#:~:text=About:%20This%20is%20probably%20the,a%20casual%20hi%20in%20Japanese."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.kanpai-japan.com/learn-japanese/hello#:~:text=To%20say%20'hello'%20in%20Japanese%20*%20%E3%81%8A%E3%81%AF%E3%82%88%E3%81%86,ne):%20'it's%20been%20a%20while'%20(more%20polite)"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://www.fluentin3months.com/hello-in-japanese/#:~:text=say%20%E2%80%9Chi.%E2%80%9D-,%E2%80%9CHello%E2%80%9D%20in%20Japanese%20%E2%80%93%20%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF%20(Konnichiwa),use%20it%20in%20the%20afternoon."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://tandem.net/blog/greetings-in-japanese#:~:text=1.,use%20it%20during%20the%20day."
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://forum.flexiclasses.com/t/how-to-say-hello-in-japanese-is-konnichiwa-the-best-choice/2557"
},
{
- "misc": {},
"text": "",
- "title": "",
"url": "https://en.amazingtalker.com/blog/en/japanese/46831/"
},
{
- "misc": {},
"text": "Learn more",
- "title": "",
"url": "https://support.google.com/websearch?p=ai_overviews&hl=en"
},
{
- "misc": {},
"text": "Privacy Policy",
- "title": "",
"url": "https://policies.google.com/privacy?hl=en"
}
]
@@ -170,7 +143,8 @@
"cmpt_rank": 3,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -201,7 +175,8 @@
"cmpt_rank": 5,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -414,7 +389,8 @@
"cmpt_rank": 11,
"details": {
"duration": null,
- "source": null
+ "source": null,
+ "type": "video"
},
"error": null,
"section": "main",
@@ -443,7 +419,7 @@
{
"cite": null,
"cmpt_rank": 13,
- "details": [],
+ "details": null,
"error": null,
"section": "footer",
"serp_rank": 22,
diff --git a/tests/test_feature_extractor.py b/tests/test_feature_extractor.py
new file mode 100644
index 0000000..63e51c4
--- /dev/null
+++ b/tests/test_feature_extractor.py
@@ -0,0 +1,167 @@
+"""Tests for SERP feature extraction"""
+
+from WebSearcher.feature_extractor import FeatureExtractor
+
+
+def make_html(body="", lang="en"):
+ return f'{body}'
+
+
+# Result estimate extraction ---------------------------------------------------
+
+
+def test_extract_result_count_and_time():
+ html = make_html('About 1,234,567 results (0.42 seconds)
')
+ features = FeatureExtractor.extract_features(html)
+ assert features.result_estimate_count == 1234567
+ assert features.result_estimate_time == 0.42
+
+
+def test_extract_result_count_no_comma():
+ html = make_html('About 500 results (0.1 seconds)
')
+ features = FeatureExtractor.extract_features(html)
+ assert features.result_estimate_count == 500
+
+
+def test_extract_result_count_short_time():
+ html = make_html('About 100 results (0.05s)
')
+ features = FeatureExtractor.extract_features(html)
+ assert features.result_estimate_count == 100
+ assert features.result_estimate_time == 0.05
+
+
+def test_extract_no_result_stats():
+ html = make_html("some content
")
+ features = FeatureExtractor.extract_features(html)
+ assert features.result_estimate_count is None
+ assert features.result_estimate_time is None
+
+
+# Language extraction ----------------------------------------------------------
+
+
+def test_extract_language():
+ html = make_html("hello
", lang="en")
+ features = FeatureExtractor.extract_features(html)
+ assert features.language == "en"
+
+
+def test_extract_language_other():
+ html = make_html("hola
", lang="es")
+ features = FeatureExtractor.extract_features(html)
+ assert features.language == "es"
+
+
+def test_extract_no_language():
+ html = "no lang attr"
+ features = FeatureExtractor.extract_features(html)
+ assert features.language is None
+
+
+# No results notice ------------------------------------------------------------
+
+
+def test_no_results_notice_detected():
+ html = make_html("Your search - asdfqwerty - did not match any documents.")
+ features = FeatureExtractor.extract_features(html)
+ assert features.notice_no_results is True
+
+
+def test_no_results_notice_absent():
+ html = make_html("Normal results here
")
+ features = FeatureExtractor.extract_features(html)
+ assert features.notice_no_results is False
+
+
+# String match features --------------------------------------------------------
+
+
+def test_shortened_query_notice():
+ html = make_html("(and any subsequent words) was ignored because we limit queries to 32 words.")
+ features = FeatureExtractor.extract_features(html)
+ assert features.notice_shortened_query is True
+
+
+def test_server_error_notice():
+ html = make_html(
+ "We're sorry but it appears that there has been an internal server error "
+ "while processing your request."
+ )
+ features = FeatureExtractor.extract_features(html)
+ assert features.notice_server_error is True
+
+
+def test_infinity_scroll():
+ html = make_html('More results ')
+ features = FeatureExtractor.extract_features(html)
+ assert features.infinity_scroll is True
+
+
+def test_no_string_matches():
+ html = make_html("clean page
")
+ features = FeatureExtractor.extract_features(html)
+ assert features.notice_shortened_query is False
+ assert features.notice_server_error is False
+ assert features.infinity_scroll is False
+
+
+# CAPTCHA detection ------------------------------------------------------------
+
+
+def test_captcha_detected():
+ html = make_html("Please solve the CAPTCHA to continue
")
+ features = FeatureExtractor.extract_features(html)
+ assert features.captcha is True
+
+
+def test_captcha_absent():
+ html = make_html("Normal page
")
+ features = FeatureExtractor.extract_features(html)
+ assert features.captcha is False
+
+
+# Location overlay -------------------------------------------------------------
+
+
+def test_location_overlay_detected():
+ html = make_html('Use precise location
')
+ features = FeatureExtractor.extract_features(html)
+ assert features.overlay_precise_location is True
+
+
+def test_location_overlay_absent():
+ html = make_html('Something else
')
+ features = FeatureExtractor.extract_features(html)
+ assert features.overlay_precise_location is False
+
+
+def test_location_overlay_no_div():
+ html = make_html("no lb div
")
+ features = FeatureExtractor.extract_features(html)
+ assert features.overlay_precise_location is False
+
+
+# BeautifulSoup input ----------------------------------------------------------
+
+
+def test_extract_from_soup():
+ from bs4 import BeautifulSoup
+
+ soup = BeautifulSoup(
+ make_html('About 99 results (0.3 seconds)
'), "lxml"
+ )
+ features = FeatureExtractor.extract_features(soup)
+ assert features.result_estimate_count == 99
+ assert features.result_estimate_time == 0.3
+
+
+# model_dump -------------------------------------------------------------------
+
+
+def test_features_model_dump():
+ html = make_html('About 10 results (0.1 seconds)
')
+ features = FeatureExtractor.extract_features(html)
+ d = features.model_dump()
+ assert isinstance(d, dict)
+ assert d["result_estimate_count"] == 10
+ assert d["captcha"] is False
diff --git a/tests/test_locations.py b/tests/test_locations.py
new file mode 100644
index 0000000..5bab173
--- /dev/null
+++ b/tests/test_locations.py
@@ -0,0 +1,62 @@
+"""Tests for protobuf encoding and UULE generation"""
+
+from WebSearcher.locations import (
+ convert_canonical_name_to_uule,
+ decode_protobuf_string,
+ encode_protobuf_string,
+)
+
+
+def test_encode_decode_roundtrip_integers():
+ fields = {1: 2, 2: 32}
+ encoded = encode_protobuf_string(fields)
+ decoded = decode_protobuf_string(encoded)
+ assert decoded == fields
+
+
+def test_encode_decode_roundtrip_string():
+ fields = {1: 2, 4: "Boston,Massachusetts,United States"}
+ encoded = encode_protobuf_string(fields)
+ decoded = decode_protobuf_string(encoded)
+ assert decoded == fields
+
+
+def test_encode_decode_roundtrip_mixed():
+ fields = {1: 2, 2: 32, 4: "New York,New York,United States"}
+ encoded = encode_protobuf_string(fields)
+ decoded = decode_protobuf_string(encoded)
+ assert decoded == fields
+
+
+def test_encode_decode_unicode():
+ fields = {1: 0, 4: "Zurich,Zurich,Switzerland"}
+ encoded = encode_protobuf_string(fields)
+ decoded = decode_protobuf_string(encoded)
+ assert decoded == fields
+
+
+def test_uule_starts_with_prefix():
+ result = convert_canonical_name_to_uule("Boston,Massachusetts,United States")
+ assert result.startswith("w+")
+
+
+def test_uule_deterministic():
+ name = "New York,New York,United States"
+ assert convert_canonical_name_to_uule(name) == convert_canonical_name_to_uule(name)
+
+
+def test_uule_different_locations_differ():
+ a = convert_canonical_name_to_uule("Boston,Massachusetts,United States")
+ b = convert_canonical_name_to_uule("New York,New York,United States")
+ assert a != b
+
+
+def test_uule_encodes_expected_fields():
+ """UULE should encode fields {1: 2, 2: 32, 4: canon_name}"""
+ name = "Austin,Texas,United States"
+ uule = convert_canonical_name_to_uule(name)
+ encoded = uule[2:] # strip "w+" prefix
+ decoded = decode_protobuf_string(encoded)
+ assert decoded[1] == 2
+ assert decoded[2] == 32
+ assert decoded[4] == name
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..2005631
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,172 @@
+"""Tests for config models and search params"""
+
+import pytest
+
+from WebSearcher.models.configs import (
+ LogConfig,
+ RequestsConfig,
+ SearchConfig,
+ SearchMethod,
+ SeleniumConfig,
+)
+from WebSearcher.models.data import BaseResult
+from WebSearcher.models.searches import SearchParams
+
+# BaseConfig.create ------------------------------------------------------------
+
+
+def test_base_config_create_from_dict():
+ cfg = LogConfig.create({"console": False, "console_level": "DEBUG"})
+ assert cfg.console is False
+ assert cfg.console_level == "DEBUG"
+
+
+def test_base_config_create_from_instance():
+ original = LogConfig(console=False)
+ result = LogConfig.create(original)
+ assert result is original
+
+
+def test_base_config_create_default():
+ cfg = LogConfig.create()
+ assert cfg.console is True # default value
+
+
+def test_base_config_create_none():
+ cfg = LogConfig.create(None)
+ assert isinstance(cfg, LogConfig)
+
+
+# SearchMethod -----------------------------------------------------------------
+
+
+def test_search_method_from_string():
+ assert SearchMethod.create("selenium") == SearchMethod.SELENIUM
+ assert SearchMethod.create("requests") == SearchMethod.REQUESTS
+
+
+def test_search_method_case_insensitive():
+ assert SearchMethod.create("SELENIUM") == SearchMethod.SELENIUM
+ assert SearchMethod.create("Requests") == SearchMethod.REQUESTS
+
+
+def test_search_method_from_enum():
+ assert SearchMethod.create(SearchMethod.SELENIUM) == SearchMethod.SELENIUM
+
+
+def test_search_method_default():
+ assert SearchMethod.create(None) == SearchMethod.SELENIUM
+
+
+def test_search_method_invalid_string():
+ with pytest.raises(ValueError, match="Invalid search method"):
+ SearchMethod.create("invalid")
+
+
+def test_search_method_invalid_type():
+ with pytest.raises(TypeError, match="Expected string or SearchMethod"):
+ SearchMethod.create(123)
+
+
+# SeleniumConfig ---------------------------------------------------------------
+
+
+def test_selenium_config_defaults():
+ cfg = SeleniumConfig()
+ assert cfg.headless is False
+ assert cfg.version_main is None
+ assert cfg.use_subprocess is False
+
+
+def test_selenium_config_create():
+ cfg = SeleniumConfig.create({"headless": True, "version_main": 130})
+ assert cfg.headless is True
+ assert cfg.version_main == 130
+
+
+# RequestsConfig ---------------------------------------------------------------
+
+
+def test_requests_config_has_default_headers():
+ cfg = RequestsConfig()
+ assert "User-Agent" in cfg.headers
+ assert "Host" in cfg.headers
+
+
+def test_requests_config_sesh():
+ cfg = RequestsConfig()
+ sesh = cfg.sesh
+ assert "User-Agent" in sesh.headers
+
+
+# SearchConfig -----------------------------------------------------------------
+
+
+def test_search_config_defaults():
+ cfg = SearchConfig()
+ assert cfg.method == SearchMethod.SELENIUM
+ assert isinstance(cfg.log, LogConfig)
+ assert isinstance(cfg.selenium, SeleniumConfig)
+ assert isinstance(cfg.requests, RequestsConfig)
+
+
+# SearchParams -----------------------------------------------------------------
+
+
+def test_search_params_url_basic():
+ params = SearchParams.create({"qry": "hello world"})
+ assert "q=hello+world" in params.url
+ assert params.url.startswith("https://www.google.com/search?")
+
+
+def test_search_params_url_with_lang():
+ params = SearchParams.create({"qry": "test", "lang": "en"})
+ assert "hl=en" in params.url
+
+
+def test_search_params_url_with_num_results():
+ params = SearchParams.create({"qry": "test", "num_results": 20})
+ assert "num=20" in params.url
+
+
+def test_search_params_url_with_location():
+ params = SearchParams.create({"qry": "pizza", "loc": "Boston,Massachusetts,United States"})
+ assert "uule=" in params.url
+
+
+def test_search_params_url_omits_none():
+ params = SearchParams.create({"qry": "test", "lang": None, "num_results": None})
+ assert "hl=" not in params.url
+ assert "num=" not in params.url
+
+
+def test_search_params_serp_id_is_hex():
+ params = SearchParams.create({"qry": "test"})
+ assert len(params.serp_id) == 56 # sha224 hex length
+ assert all(c in "0123456789abcdef" for c in params.serp_id)
+
+
+def test_search_params_to_serp_output():
+ params = SearchParams.create({"qry": "test query", "loc": "New York"})
+ output = params.to_serp_output()
+ assert output["qry"] == "test query"
+ assert output["loc"] == "New York"
+ assert "url" in output
+ assert "serp_id" in output
+
+
+def test_search_params_special_chars():
+ params = SearchParams.create({"qry": "cats & dogs"})
+ assert "q=cats+%26+dogs" in params.url
+
+
+# BaseResult -------------------------------------------------------------------
+
+
+def test_base_result_defaults():
+ r = BaseResult()
+ assert r.type == "unclassified"
+ assert r.sub_rank == 0
+ assert r.title is None
+ assert r.url is None
+ assert r.error is None
diff --git a/tests/test_parse_serp.py b/tests/test_parse_serp.py
index bae6e99..49740cd 100644
--- a/tests/test_parse_serp.py
+++ b/tests/test_parse_serp.py
@@ -4,11 +4,10 @@
from pathlib import Path
import orjson
-
import pytest
-import WebSearcher as ws
from syrupy.extensions.json import JSONSnapshotExtension
+import WebSearcher as ws
# ---------------------------------------------------------------------------
# Data loading
@@ -36,6 +35,7 @@ def load_all_serps() -> list[dict]:
# Fixtures
# ---------------------------------------------------------------------------
+
@pytest.fixture
def snapshot_json(snapshot):
return snapshot.use_extension(JSONSnapshotExtension)
@@ -57,10 +57,11 @@ def pytest_generate_tests(metafunc):
# Snapshot tests
# ---------------------------------------------------------------------------
+
@pytest.mark.skipif(not SERPS_PATHS, reason="Demo data not available")
def test_parse_serp(snapshot_json, serp_record):
"""Parse SERP and compare to snapshot"""
- parsed = ws.parse_serp(serp_record["html"], extract_features=True)
+ parsed = ws.parse_serp(serp_record["html"])
assert parsed == snapshot_json
@@ -69,8 +70,18 @@ def test_parse_serp(snapshot_json, serp_record):
# ---------------------------------------------------------------------------
EXPECTED_KEYS = {
- "section", "cmpt_rank", "sub_rank", "type", "sub_type",
- "title", "url", "text", "cite", "details", "error", "serp_rank",
+ "section",
+ "cmpt_rank",
+ "sub_rank",
+ "type",
+ "sub_type",
+ "title",
+ "url",
+ "text",
+ "cite",
+ "details",
+ "error",
+ "serp_rank",
}
@@ -79,8 +90,7 @@ def all_parsed_serps():
"""Parse all SERPs and return list of parsed outputs"""
if not SERPS_PATHS:
pytest.skip("Demo data not available")
- return [ws.parse_serp(record["html"], extract_features=True)
- for record in load_all_serps()]
+ return [ws.parse_serp(record["html"]) for record in load_all_serps()]
@pytest.fixture(scope="module")
@@ -95,7 +105,9 @@ def all_results(all_parsed_serps):
def test_results_have_expected_keys(all_results):
"""Every result dict has exactly the expected keys"""
for r in all_results:
- assert set(r.keys()) == EXPECTED_KEYS, f"cmpt {r.get('cmpt_rank')}: {set(r.keys()) ^ EXPECTED_KEYS}"
+ assert set(r.keys()) == EXPECTED_KEYS, (
+ f"cmpt {r.get('cmpt_rank')}: {set(r.keys()) ^ EXPECTED_KEYS}"
+ )
def test_no_unclassified_results(all_results):
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..e4e81fb
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,338 @@
+"""Tests for utility functions"""
+
+import hashlib
+from pathlib import Path
+
+import pytest
+from bs4 import BeautifulSoup
+
+from WebSearcher import utils
+
+# hash_id ----------------------------------------------------------------------
+
+
+def test_hash_id_produces_sha224():
+ expected = hashlib.sha224(b"hello").hexdigest()
+ assert utils.hash_id("hello") == expected
+
+
+def test_hash_id_different_inputs_differ():
+ assert utils.hash_id("a") != utils.hash_id("b")
+
+
+# get_between_parentheses -----------------------------------------------------
+
+
+def test_get_between_parentheses():
+ assert utils.get_between_parentheses("rating (123)") == "123"
+
+
+def test_get_between_parentheses_first_match():
+ assert utils.get_between_parentheses("(first) (second)") == "first"
+
+
+def test_get_between_parentheses_no_match():
+ with pytest.raises(AttributeError):
+ utils.get_between_parentheses("no parens here")
+
+
+# check_dict_value -------------------------------------------------------------
+
+
+def test_check_dict_value_match():
+ assert utils.check_dict_value({"role": "complementary"}, "role", "complementary") is True
+
+
+def test_check_dict_value_mismatch():
+ assert utils.check_dict_value({"role": "main"}, "role", "complementary") is False
+
+
+def test_check_dict_value_missing_key():
+ assert utils.check_dict_value({"other": "val"}, "role", "complementary") is False
+
+
+def test_check_dict_value_list():
+ assert utils.check_dict_value({"class": ["a", "b"]}, "class", ["a", "b"]) is True
+
+
+# make_soup --------------------------------------------------------------------
+
+
+def test_make_soup_from_string():
+ soup = utils.make_soup("hello
")
+ assert isinstance(soup, BeautifulSoup)
+ assert soup.find("p").text == "hello"
+
+
+def test_make_soup_from_bytes():
+ soup = utils.make_soup(b"bytes
")
+ assert soup.find("p").text == "bytes"
+
+
+def test_make_soup_passthrough():
+ original = BeautifulSoup("test
", "lxml")
+ result = utils.make_soup(original)
+ assert result is original
+
+
+# has_captcha ------------------------------------------------------------------
+
+
+def test_has_captcha_true():
+ soup = utils.make_soup("Please solve this CAPTCHA")
+ assert utils.has_captcha(soup) is True
+
+
+def test_has_captcha_false():
+ soup = utils.make_soup("Normal search results")
+ assert utils.has_captcha(soup) is False
+
+
+# get_div / get_text / get_link ------------------------------------------------
+
+
+def test_get_div_finds_element():
+ soup = utils.make_soup("hi
")
+ div = utils.get_div(soup, "span", {"class": "x"})
+ assert div.text == "hi"
+
+
+def test_get_div_none_soup():
+ assert utils.get_div(None, "div") is None
+
+
+def test_get_div_no_match():
+ soup = utils.make_soup("hello
")
+ assert utils.get_div(soup, "span") is None
+
+
+def test_get_text_basic():
+ soup = utils.make_soup("
Title ")
+ assert utils.get_text(soup, "h3") == "Title"
+
+
+def test_get_text_strip():
+ soup = utils.make_soup(" spaced
")
+ assert utils.get_text(soup, strip=True) == "spaced"
+ assert utils.get_text(soup, strip=False) == " spaced "
+
+
+def test_get_text_none_soup():
+ assert utils.get_text(None, "h3") is None
+
+
+def test_get_text_no_match():
+ soup = utils.make_soup("hello
")
+ assert utils.get_text(soup, "h3") is None
+
+
+def test_get_text_separator():
+ soup = utils.make_soup("")
+ result = utils.get_text(soup, "div", separator="<|>")
+ assert "<|>" in result
+
+
+def test_get_link_basic():
+ soup = utils.make_soup('')
+ assert utils.get_link(soup) == "https://example.com"
+
+
+def test_get_link_none_soup():
+ assert utils.get_link(None) is None
+
+
+def test_get_link_no_anchor():
+ soup = utils.make_soup("no link
")
+ assert utils.get_link(soup) is None
+
+
+def test_get_link_with_attrs():
+ soup = utils.make_soup(
+ ''
+ )
+ assert utils.get_link(soup, {"class": "target"}) == "/found"
+
+
+# get_link_list ----------------------------------------------------------------
+
+
+def test_get_link_list():
+ soup = utils.make_soup('')
+ links = utils.get_link_list(soup)
+ assert links == ["/a", "/b"]
+
+
+def test_get_link_list_none_soup():
+ assert utils.get_link_list(None) is None
+
+
+# get_text_by_selectors --------------------------------------------------------
+
+
+def test_get_text_by_selectors_first_match():
+ soup = utils.make_soup('first second
')
+ selectors = [("span", {"class": "a"}), ("span", {"class": "b"})]
+ assert utils.get_text_by_selectors(soup, selectors) == "first"
+
+
+def test_get_text_by_selectors_fallback():
+ soup = utils.make_soup('fallback
')
+ selectors = [("span", {"class": "a"}), ("span", {"class": "b"})]
+ assert utils.get_text_by_selectors(soup, selectors) == "fallback"
+
+
+def test_get_text_by_selectors_none():
+ assert utils.get_text_by_selectors(None, [("div", {})]) is None
+ soup = utils.make_soup("hi
")
+ assert utils.get_text_by_selectors(soup, None) is None
+
+
+# find_all_divs ----------------------------------------------------------------
+
+
+def test_find_all_divs():
+ soup = utils.make_soup("")
+ divs = utils.find_all_divs(soup, "p")
+ assert len(divs) == 2 # empty one filtered
+
+
+def test_find_all_divs_no_filter():
+ soup = utils.make_soup("")
+ divs = utils.find_all_divs(soup, "p", filter_empty=False)
+ assert len(divs) == 2
+
+
+def test_find_all_divs_none_soup():
+ assert utils.find_all_divs(None, "div") == []
+
+
+# filter_empty_divs ------------------------------------------------------------
+
+
+def test_filter_empty_divs():
+ soup = utils.make_soup("")
+ all_p = soup.find_all("p")
+ filtered = utils.filter_empty_divs(all_p)
+ assert len(filtered) == 2
+
+
+# find_children ----------------------------------------------------------------
+
+
+def test_find_children():
+ soup = utils.make_soup('a b
')
+ children = list(utils.find_children(soup, "div", {"class": "parent"}))
+ assert len(children) >= 2
+
+
+def test_find_children_no_match():
+ soup = utils.make_soup("hello
")
+ children = list(utils.find_children(soup, "span"))
+ assert children == []
+
+
+# URL functions ----------------------------------------------------------------
+
+
+def test_encode_param_value():
+ assert utils.encode_param_value("hello world") == "hello+world"
+ assert utils.encode_param_value("a&b=c") == "a%26b%3Dc"
+
+
+def test_url_unquote():
+ assert utils.url_unquote("hello%20world") == "hello world"
+ assert utils.url_unquote("a%26b") == "a&b"
+
+
+def test_join_url_quote():
+ result = utils.join_url_quote({"q": "hello+world", "hl": "en"})
+ assert result == "q=hello+world&hl=en"
+
+
+def test_get_domain_basic():
+ assert utils.get_domain("https://www.example.com/page") == "example.com"
+
+
+def test_get_domain_with_subdomain():
+ assert utils.get_domain("https://blog.example.com/page") == "blog.example.com"
+
+
+def test_get_domain_none():
+ assert utils.get_domain(None) == ""
+
+
+def test_get_domain_empty():
+ assert utils.get_domain("") == ""
+
+
+# read_lines / write_lines ----------------------------------------------------
+
+
+def test_write_and_read_json(tmp_path):
+ fp = tmp_path / "data.json"
+ data = [{"key": "value", "num": 42}, {"key": "other", "num": 7}]
+ utils.write_lines(data, fp)
+ result = utils.read_lines(fp)
+ assert result == data
+
+
+def test_write_and_read_text(tmp_path):
+ fp = tmp_path / "data.txt"
+ lines = ["line one", "line two", "line three"]
+ utils.write_lines(lines, fp)
+ result = utils.read_lines(fp)
+ assert result == lines
+
+
+def test_write_lines_append(tmp_path):
+ fp = tmp_path / "data.json"
+ utils.write_lines([{"a": 1}], fp)
+ utils.write_lines([{"b": 2}], fp)
+ result = utils.read_lines(fp)
+ assert len(result) == 2
+ assert result[0] == {"a": 1}
+ assert result[1] == {"b": 2}
+
+
+def test_write_lines_overwrite(tmp_path):
+ fp = tmp_path / "data.json"
+ utils.write_lines([{"a": 1}], fp)
+ utils.write_lines([{"b": 2}], fp, overwrite=True)
+ result = utils.read_lines(fp)
+ assert len(result) == 1
+ assert result[0] == {"b": 2}
+
+
+def test_read_lines_accepts_path_object(tmp_path):
+ fp = tmp_path / "test.json"
+ utils.write_lines([{"x": 1}], fp)
+ result = utils.read_lines(Path(fp))
+ assert result == [{"x": 1}]
+
+
+# load_html / load_soup -------------------------------------------------------
+
+
+def test_load_html(tmp_path):
+ fp = tmp_path / "page.html"
+ fp.write_text("hello")
+ content = utils.load_html(fp)
+ assert "hello" in content
+
+
+def test_load_html_brotli(tmp_path):
+ import brotli
+
+ fp = tmp_path / "page.html.br"
+ original = b"compressed"
+ fp.write_bytes(brotli.compress(original))
+ content = utils.load_html(fp, zipped=True)
+ assert b"compressed" in content
+
+
+def test_load_soup(tmp_path):
+ fp = tmp_path / "page.html"
+ fp.write_text("soup test
")
+ soup = utils.load_soup(fp)
+ assert isinstance(soup, BeautifulSoup)
+ assert soup.find("p").text == "soup test"
diff --git a/uv.lock b/uv.lock
index 67bc6bc..2a6cf6b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -218,6 +218,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
]
+[[package]]
+name = "cfgv"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/b5/721b8799b04bf9afe054a3899c6cf4e880fcf8563cc71c15610242490a0c/cfgv-3.5.0.tar.gz", hash = "sha256:d5b1034354820651caa73ede66a6294d6e95c1b00acc5e9b098e917404669132", size = 7334, upload-time = "2025-11-19T20:55:51.612Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
+]
+
[[package]]
name = "charset-normalizer"
version = "3.4.4"
@@ -337,6 +346,124 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl", hash = "sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417", size = 7294, upload-time = "2025-07-25T14:02:02.896Z" },
]
+[[package]]
+name = "coverage"
+version = "7.13.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/56/95b7e30fa389756cb56630faa728da46a27b8c6eb46f9d557c68fff12b65/coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91", size = 827239, upload-time = "2026-02-09T12:59:03.86Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/44/d4/7827d9ffa34d5d4d752eec907022aa417120936282fc488306f5da08c292/coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415", size = 219152, upload-time = "2026-02-09T12:56:11.974Z" },
+ { url = "https://files.pythonhosted.org/packages/35/b0/d69df26607c64043292644dbb9dc54b0856fabaa2cbb1eeee3331cc9e280/coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b", size = 219667, upload-time = "2026-02-09T12:56:13.33Z" },
+ { url = "https://files.pythonhosted.org/packages/82/a4/c1523f7c9e47b2271dbf8c2a097e7a1f89ef0d66f5840bb59b7e8814157b/coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a", size = 246425, upload-time = "2026-02-09T12:56:14.552Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f", size = 248229, upload-time = "2026-02-09T12:56:16.31Z" },
+ { url = "https://files.pythonhosted.org/packages/35/98/85aba0aed5126d896162087ef3f0e789a225697245256fc6181b95f47207/coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012", size = 250106, upload-time = "2026-02-09T12:56:18.024Z" },
+ { url = "https://files.pythonhosted.org/packages/96/72/1db59bd67494bc162e3e4cd5fbc7edba2c7026b22f7c8ef1496d58c2b94c/coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def", size = 252021, upload-time = "2026-02-09T12:56:19.272Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/97/72899c59c7066961de6e3daa142d459d47d104956db43e057e034f015c8a/coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256", size = 247114, upload-time = "2026-02-09T12:56:21.051Z" },
+ { url = "https://files.pythonhosted.org/packages/39/1f/f1885573b5970235e908da4389176936c8933e86cb316b9620aab1585fa2/coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda", size = 248143, upload-time = "2026-02-09T12:56:22.585Z" },
+ { url = "https://files.pythonhosted.org/packages/a8/cf/e80390c5b7480b722fa3e994f8202807799b85bc562aa4f1dde209fbb7be/coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92", size = 246152, upload-time = "2026-02-09T12:56:23.748Z" },
+ { url = "https://files.pythonhosted.org/packages/44/bf/f89a8350d85572f95412debb0fb9bb4795b1d5b5232bd652923c759e787b/coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c", size = 249959, upload-time = "2026-02-09T12:56:25.209Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/6e/612a02aece8178c818df273e8d1642190c4875402ca2ba74514394b27aba/coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58", size = 246416, upload-time = "2026-02-09T12:56:26.475Z" },
+ { url = "https://files.pythonhosted.org/packages/cb/98/b5afc39af67c2fa6786b03c3a7091fc300947387ce8914b096db8a73d67a/coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9", size = 247025, upload-time = "2026-02-09T12:56:27.727Z" },
+ { url = "https://files.pythonhosted.org/packages/51/30/2bba8ef0682d5bd210c38fe497e12a06c9f8d663f7025e9f5c2c31ce847d/coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf", size = 221758, upload-time = "2026-02-09T12:56:29.051Z" },
+ { url = "https://files.pythonhosted.org/packages/78/13/331f94934cf6c092b8ea59ff868eb587bc8fe0893f02c55bc6c0183a192e/coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95", size = 222693, upload-time = "2026-02-09T12:56:30.366Z" },
+ { url = "https://files.pythonhosted.org/packages/b4/ad/b59e5b451cf7172b8d1043dc0fa718f23aab379bc1521ee13d4bd9bfa960/coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053", size = 219278, upload-time = "2026-02-09T12:56:31.673Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/17/0cb7ca3de72e5f4ef2ec2fa0089beafbcaaaead1844e8b8a63d35173d77d/coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11", size = 219783, upload-time = "2026-02-09T12:56:33.104Z" },
+ { url = "https://files.pythonhosted.org/packages/ab/63/325d8e5b11e0eaf6d0f6a44fad444ae58820929a9b0de943fa377fe73e85/coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa", size = 250200, upload-time = "2026-02-09T12:56:34.474Z" },
+ { url = "https://files.pythonhosted.org/packages/76/53/c16972708cbb79f2942922571a687c52bd109a7bd51175aeb7558dff2236/coverage-7.13.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7", size = 252114, upload-time = "2026-02-09T12:56:35.749Z" },
+ { url = "https://files.pythonhosted.org/packages/eb/c2/7ab36d8b8cc412bec9ea2d07c83c48930eb4ba649634ba00cb7e4e0f9017/coverage-7.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00", size = 254220, upload-time = "2026-02-09T12:56:37.796Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/4d/cf52c9a3322c89a0e6febdfbc83bb45c0ed3c64ad14081b9503adee702e7/coverage-7.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef", size = 256164, upload-time = "2026-02-09T12:56:39.016Z" },
+ { url = "https://files.pythonhosted.org/packages/78/e9/eb1dd17bd6de8289df3580e967e78294f352a5df8a57ff4671ee5fc3dcd0/coverage-7.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903", size = 250325, upload-time = "2026-02-09T12:56:40.668Z" },
+ { url = "https://files.pythonhosted.org/packages/71/07/8c1542aa873728f72267c07278c5cc0ec91356daf974df21335ccdb46368/coverage-7.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f", size = 251913, upload-time = "2026-02-09T12:56:41.97Z" },
+ { url = "https://files.pythonhosted.org/packages/74/d7/c62e2c5e4483a748e27868e4c32ad3daa9bdddbba58e1bc7a15e252baa74/coverage-7.13.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299", size = 249974, upload-time = "2026-02-09T12:56:43.323Z" },
+ { url = "https://files.pythonhosted.org/packages/98/9f/4c5c015a6e98ced54efd0f5cf8d31b88e5504ecb6857585fc0161bb1e600/coverage-7.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505", size = 253741, upload-time = "2026-02-09T12:56:45.155Z" },
+ { url = "https://files.pythonhosted.org/packages/bd/59/0f4eef89b9f0fcd9633b5d350016f54126ab49426a70ff4c4e87446cabdc/coverage-7.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6", size = 249695, upload-time = "2026-02-09T12:56:46.636Z" },
+ { url = "https://files.pythonhosted.org/packages/b5/2c/b7476f938deb07166f3eb281a385c262675d688ff4659ad56c6c6b8e2e70/coverage-7.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9", size = 250599, upload-time = "2026-02-09T12:56:48.13Z" },
+ { url = "https://files.pythonhosted.org/packages/b8/34/c3420709d9846ee3785b9f2831b4d94f276f38884032dca1457fa83f7476/coverage-7.13.4-cp311-cp311-win32.whl", hash = "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9", size = 221780, upload-time = "2026-02-09T12:56:50.479Z" },
+ { url = "https://files.pythonhosted.org/packages/61/08/3d9c8613079d2b11c185b865de9a4c1a68850cfda2b357fae365cf609f29/coverage-7.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f", size = 222715, upload-time = "2026-02-09T12:56:51.815Z" },
+ { url = "https://files.pythonhosted.org/packages/18/1a/54c3c80b2f056164cc0a6cdcb040733760c7c4be9d780fe655f356f433e4/coverage-7.13.4-cp311-cp311-win_arm64.whl", hash = "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f", size = 221385, upload-time = "2026-02-09T12:56:53.194Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/81/4ce2fdd909c5a0ed1f6dedb88aa57ab79b6d1fbd9b588c1ac7ef45659566/coverage-7.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459", size = 219449, upload-time = "2026-02-09T12:56:54.889Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/96/5238b1efc5922ddbdc9b0db9243152c09777804fb7c02ad1741eb18a11c0/coverage-7.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3", size = 219810, upload-time = "2026-02-09T12:56:56.33Z" },
+ { url = "https://files.pythonhosted.org/packages/78/72/2f372b726d433c9c35e56377cf1d513b4c16fe51841060d826b95caacec1/coverage-7.13.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634", size = 251308, upload-time = "2026-02-09T12:56:57.858Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/a0/2ea570925524ef4e00bb6c82649f5682a77fac5ab910a65c9284de422600/coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3", size = 254052, upload-time = "2026-02-09T12:56:59.754Z" },
+ { url = "https://files.pythonhosted.org/packages/e8/ac/45dc2e19a1939098d783c846e130b8f862fbb50d09e0af663988f2f21973/coverage-7.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa", size = 255165, upload-time = "2026-02-09T12:57:01.287Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/4d/26d236ff35abc3b5e63540d3386e4c3b192168c1d96da5cb2f43c640970f/coverage-7.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3", size = 257432, upload-time = "2026-02-09T12:57:02.637Z" },
+ { url = "https://files.pythonhosted.org/packages/ec/55/14a966c757d1348b2e19caf699415a2a4c4f7feaa4bbc6326a51f5c7dd1b/coverage-7.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a", size = 251716, upload-time = "2026-02-09T12:57:04.056Z" },
+ { url = "https://files.pythonhosted.org/packages/77/33/50116647905837c66d28b2af1321b845d5f5d19be9655cb84d4a0ea806b4/coverage-7.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7", size = 253089, upload-time = "2026-02-09T12:57:05.503Z" },
+ { url = "https://files.pythonhosted.org/packages/c2/b4/8efb11a46e3665d92635a56e4f2d4529de6d33f2cb38afd47d779d15fc99/coverage-7.13.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc", size = 251232, upload-time = "2026-02-09T12:57:06.879Z" },
+ { url = "https://files.pythonhosted.org/packages/51/24/8cd73dd399b812cc76bb0ac260e671c4163093441847ffe058ac9fda1e32/coverage-7.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47", size = 255299, upload-time = "2026-02-09T12:57:08.245Z" },
+ { url = "https://files.pythonhosted.org/packages/03/94/0a4b12f1d0e029ce1ccc1c800944a9984cbe7d678e470bb6d3c6bc38a0da/coverage-7.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985", size = 250796, upload-time = "2026-02-09T12:57:10.142Z" },
+ { url = "https://files.pythonhosted.org/packages/73/44/6002fbf88f6698ca034360ce474c406be6d5a985b3fdb3401128031eef6b/coverage-7.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0", size = 252673, upload-time = "2026-02-09T12:57:12.197Z" },
+ { url = "https://files.pythonhosted.org/packages/de/c6/a0279f7c00e786be75a749a5674e6fa267bcbd8209cd10c9a450c655dfa7/coverage-7.13.4-cp312-cp312-win32.whl", hash = "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246", size = 221990, upload-time = "2026-02-09T12:57:14.085Z" },
+ { url = "https://files.pythonhosted.org/packages/77/4e/c0a25a425fcf5557d9abd18419c95b63922e897bc86c1f327f155ef234a9/coverage-7.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126", size = 222800, upload-time = "2026-02-09T12:57:15.944Z" },
+ { url = "https://files.pythonhosted.org/packages/47/ac/92da44ad9a6f4e3a7debd178949d6f3769bedca33830ce9b1dcdab589a37/coverage-7.13.4-cp312-cp312-win_arm64.whl", hash = "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d", size = 221415, upload-time = "2026-02-09T12:57:17.497Z" },
+ { url = "https://files.pythonhosted.org/packages/db/23/aad45061a31677d68e47499197a131eea55da4875d16c1f42021ab963503/coverage-7.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9", size = 219474, upload-time = "2026-02-09T12:57:19.332Z" },
+ { url = "https://files.pythonhosted.org/packages/a5/70/9b8b67a0945f3dfec1fd896c5cefb7c19d5a3a6d74630b99a895170999ae/coverage-7.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac", size = 219844, upload-time = "2026-02-09T12:57:20.66Z" },
+ { url = "https://files.pythonhosted.org/packages/97/fd/7e859f8fab324cef6c4ad7cff156ca7c489fef9179d5749b0c8d321281c2/coverage-7.13.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea", size = 250832, upload-time = "2026-02-09T12:57:22.007Z" },
+ { url = "https://files.pythonhosted.org/packages/e4/dc/b2442d10020c2f52617828862d8b6ee337859cd8f3a1f13d607dddda9cf7/coverage-7.13.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b", size = 253434, upload-time = "2026-02-09T12:57:23.339Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/88/6728a7ad17428b18d836540630487231f5470fb82454871149502f5e5aa2/coverage-7.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525", size = 254676, upload-time = "2026-02-09T12:57:24.774Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/bc/21244b1b8cedf0dff0a2b53b208015fe798d5f2a8d5348dbfece04224fff/coverage-7.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242", size = 256807, upload-time = "2026-02-09T12:57:26.125Z" },
+ { url = "https://files.pythonhosted.org/packages/97/a0/ddba7ed3251cff51006737a727d84e05b61517d1784a9988a846ba508877/coverage-7.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148", size = 251058, upload-time = "2026-02-09T12:57:27.614Z" },
+ { url = "https://files.pythonhosted.org/packages/9b/55/e289addf7ff54d3a540526f33751951bf0878f3809b47f6dfb3def69c6f7/coverage-7.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a", size = 252805, upload-time = "2026-02-09T12:57:29.066Z" },
+ { url = "https://files.pythonhosted.org/packages/13/4e/cc276b1fa4a59be56d96f1dabddbdc30f4ba22e3b1cd42504c37b3313255/coverage-7.13.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23", size = 250766, upload-time = "2026-02-09T12:57:30.522Z" },
+ { url = "https://files.pythonhosted.org/packages/94/44/1093b8f93018f8b41a8cf29636c9292502f05e4a113d4d107d14a3acd044/coverage-7.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80", size = 254923, upload-time = "2026-02-09T12:57:31.946Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/55/ea2796da2d42257f37dbea1aab239ba9263b31bd91d5527cdd6db5efe174/coverage-7.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea", size = 250591, upload-time = "2026-02-09T12:57:33.842Z" },
+ { url = "https://files.pythonhosted.org/packages/d4/fa/7c4bb72aacf8af5020675aa633e59c1fbe296d22aed191b6a5b711eb2bc7/coverage-7.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a", size = 252364, upload-time = "2026-02-09T12:57:35.743Z" },
+ { url = "https://files.pythonhosted.org/packages/5c/38/a8d2ec0146479c20bbaa7181b5b455a0c41101eed57f10dd19a78ab44c80/coverage-7.13.4-cp313-cp313-win32.whl", hash = "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d", size = 222010, upload-time = "2026-02-09T12:57:37.25Z" },
+ { url = "https://files.pythonhosted.org/packages/e2/0c/dbfafbe90a185943dcfbc766fe0e1909f658811492d79b741523a414a6cc/coverage-7.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd", size = 222818, upload-time = "2026-02-09T12:57:38.734Z" },
+ { url = "https://files.pythonhosted.org/packages/04/d1/934918a138c932c90d78301f45f677fb05c39a3112b96fd2c8e60503cdc7/coverage-7.13.4-cp313-cp313-win_arm64.whl", hash = "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af", size = 221438, upload-time = "2026-02-09T12:57:40.223Z" },
+ { url = "https://files.pythonhosted.org/packages/52/57/ee93ced533bcb3e6df961c0c6e42da2fc6addae53fb95b94a89b1e33ebd7/coverage-7.13.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d", size = 220165, upload-time = "2026-02-09T12:57:41.639Z" },
+ { url = "https://files.pythonhosted.org/packages/c5/e0/969fc285a6fbdda49d91af278488d904dcd7651b2693872f0ff94e40e84a/coverage-7.13.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12", size = 220516, upload-time = "2026-02-09T12:57:44.215Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/b8/9531944e16267e2735a30a9641ff49671f07e8138ecf1ca13db9fd2560c7/coverage-7.13.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b", size = 261804, upload-time = "2026-02-09T12:57:45.989Z" },
+ { url = "https://files.pythonhosted.org/packages/8a/f3/e63df6d500314a2a60390d1989240d5f27318a7a68fa30ad3806e2a9323e/coverage-7.13.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9", size = 263885, upload-time = "2026-02-09T12:57:47.42Z" },
+ { url = "https://files.pythonhosted.org/packages/f3/67/7654810de580e14b37670b60a09c599fa348e48312db5b216d730857ffe6/coverage-7.13.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092", size = 266308, upload-time = "2026-02-09T12:57:49.345Z" },
+ { url = "https://files.pythonhosted.org/packages/37/6f/39d41eca0eab3cc82115953ad41c4e77935286c930e8fad15eaed1389d83/coverage-7.13.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9", size = 267452, upload-time = "2026-02-09T12:57:50.811Z" },
+ { url = "https://files.pythonhosted.org/packages/50/6d/39c0fbb8fc5cd4d2090811e553c2108cf5112e882f82505ee7495349a6bf/coverage-7.13.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26", size = 261057, upload-time = "2026-02-09T12:57:52.447Z" },
+ { url = "https://files.pythonhosted.org/packages/a4/a2/60010c669df5fa603bb5a97fb75407e191a846510da70ac657eb696b7fce/coverage-7.13.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2", size = 263875, upload-time = "2026-02-09T12:57:53.938Z" },
+ { url = "https://files.pythonhosted.org/packages/3e/d9/63b22a6bdbd17f1f96e9ed58604c2a6b0e72a9133e37d663bef185877cf6/coverage-7.13.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940", size = 261500, upload-time = "2026-02-09T12:57:56.012Z" },
+ { url = "https://files.pythonhosted.org/packages/70/bf/69f86ba1ad85bc3ad240e4c0e57a2e620fbc0e1645a47b5c62f0e941ad7f/coverage-7.13.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c", size = 265212, upload-time = "2026-02-09T12:57:57.5Z" },
+ { url = "https://files.pythonhosted.org/packages/ae/f2/5f65a278a8c2148731831574c73e42f57204243d33bedaaf18fa79c5958f/coverage-7.13.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0", size = 260398, upload-time = "2026-02-09T12:57:59.027Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/80/6e8280a350ee9fea92f14b8357448a242dcaa243cb2c72ab0ca591f66c8c/coverage-7.13.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b", size = 262584, upload-time = "2026-02-09T12:58:01.129Z" },
+ { url = "https://files.pythonhosted.org/packages/22/63/01ff182fc95f260b539590fb12c11ad3e21332c15f9799cb5e2386f71d9f/coverage-7.13.4-cp313-cp313t-win32.whl", hash = "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9", size = 222688, upload-time = "2026-02-09T12:58:02.736Z" },
+ { url = "https://files.pythonhosted.org/packages/a9/43/89de4ef5d3cd53b886afa114065f7e9d3707bdb3e5efae13535b46ae483d/coverage-7.13.4-cp313-cp313t-win_amd64.whl", hash = "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd", size = 223746, upload-time = "2026-02-09T12:58:05.362Z" },
+ { url = "https://files.pythonhosted.org/packages/35/39/7cf0aa9a10d470a5309b38b289b9bb07ddeac5d61af9b664fe9775a4cb3e/coverage-7.13.4-cp313-cp313t-win_arm64.whl", hash = "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997", size = 222003, upload-time = "2026-02-09T12:58:06.952Z" },
+ { url = "https://files.pythonhosted.org/packages/92/11/a9cf762bb83386467737d32187756a42094927150c3e107df4cb078e8590/coverage-7.13.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601", size = 219522, upload-time = "2026-02-09T12:58:08.623Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/28/56e6d892b7b052236d67c95f1936b6a7cf7c3e2634bf27610b8cbd7f9c60/coverage-7.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689", size = 219855, upload-time = "2026-02-09T12:58:10.176Z" },
+ { url = "https://files.pythonhosted.org/packages/e5/69/233459ee9eb0c0d10fcc2fe425a029b3fa5ce0f040c966ebce851d030c70/coverage-7.13.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c", size = 250887, upload-time = "2026-02-09T12:58:12.503Z" },
+ { url = "https://files.pythonhosted.org/packages/06/90/2cdab0974b9b5bbc1623f7876b73603aecac11b8d95b85b5b86b32de5eab/coverage-7.13.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129", size = 253396, upload-time = "2026-02-09T12:58:14.615Z" },
+ { url = "https://files.pythonhosted.org/packages/ac/15/ea4da0f85bf7d7b27635039e649e99deb8173fe551096ea15017f7053537/coverage-7.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552", size = 254745, upload-time = "2026-02-09T12:58:16.162Z" },
+ { url = "https://files.pythonhosted.org/packages/99/11/bb356e86920c655ca4d61daee4e2bbc7258f0a37de0be32d233b561134ff/coverage-7.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a", size = 257055, upload-time = "2026-02-09T12:58:17.892Z" },
+ { url = "https://files.pythonhosted.org/packages/c9/0f/9ae1f8cb17029e09da06ca4e28c9e1d5c1c0a511c7074592e37e0836c915/coverage-7.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356", size = 250911, upload-time = "2026-02-09T12:58:19.495Z" },
+ { url = "https://files.pythonhosted.org/packages/89/3a/adfb68558fa815cbc29747b553bc833d2150228f251b127f1ce97e48547c/coverage-7.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71", size = 252754, upload-time = "2026-02-09T12:58:21.064Z" },
+ { url = "https://files.pythonhosted.org/packages/32/b1/540d0c27c4e748bd3cd0bd001076ee416eda993c2bae47a73b7cc9357931/coverage-7.13.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5", size = 250720, upload-time = "2026-02-09T12:58:22.622Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/95/383609462b3ffb1fe133014a7c84fc0dd01ed55ac6140fa1093b5af7ebb1/coverage-7.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98", size = 254994, upload-time = "2026-02-09T12:58:24.548Z" },
+ { url = "https://files.pythonhosted.org/packages/f7/ba/1761138e86c81680bfc3c49579d66312865457f9fe405b033184e5793cb3/coverage-7.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5", size = 250531, upload-time = "2026-02-09T12:58:26.271Z" },
+ { url = "https://files.pythonhosted.org/packages/f8/8e/05900df797a9c11837ab59c4d6fe94094e029582aab75c3309a93e6fb4e3/coverage-7.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0", size = 252189, upload-time = "2026-02-09T12:58:27.807Z" },
+ { url = "https://files.pythonhosted.org/packages/00/bd/29c9f2db9ea4ed2738b8a9508c35626eb205d51af4ab7bf56a21a2e49926/coverage-7.13.4-cp314-cp314-win32.whl", hash = "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb", size = 222258, upload-time = "2026-02-09T12:58:29.441Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/4d/1f8e723f6829977410efeb88f73673d794075091c8c7c18848d273dc9d73/coverage-7.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505", size = 223073, upload-time = "2026-02-09T12:58:31.026Z" },
+ { url = "https://files.pythonhosted.org/packages/51/5b/84100025be913b44e082ea32abcf1afbf4e872f5120b7a1cab1d331b1e13/coverage-7.13.4-cp314-cp314-win_arm64.whl", hash = "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2", size = 221638, upload-time = "2026-02-09T12:58:32.599Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/e4/c884a405d6ead1370433dad1e3720216b4f9fd8ef5b64bfd984a2a60a11a/coverage-7.13.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056", size = 220246, upload-time = "2026-02-09T12:58:34.181Z" },
+ { url = "https://files.pythonhosted.org/packages/81/5c/4d7ed8b23b233b0fffbc9dfec53c232be2e695468523242ea9fd30f97ad2/coverage-7.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc", size = 220514, upload-time = "2026-02-09T12:58:35.704Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/6f/3284d4203fd2f28edd73034968398cd2d4cb04ab192abc8cff007ea35679/coverage-7.13.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9", size = 261877, upload-time = "2026-02-09T12:58:37.864Z" },
+ { url = "https://files.pythonhosted.org/packages/09/aa/b672a647bbe1556a85337dc95bfd40d146e9965ead9cc2fe81bde1e5cbce/coverage-7.13.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf", size = 264004, upload-time = "2026-02-09T12:58:39.492Z" },
+ { url = "https://files.pythonhosted.org/packages/79/a1/aa384dbe9181f98bba87dd23dda436f0c6cf2e148aecbb4e50fc51c1a656/coverage-7.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55", size = 266408, upload-time = "2026-02-09T12:58:41.852Z" },
+ { url = "https://files.pythonhosted.org/packages/53/5e/5150bf17b4019bc600799f376bb9606941e55bd5a775dc1e096b6ffea952/coverage-7.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72", size = 267544, upload-time = "2026-02-09T12:58:44.093Z" },
+ { url = "https://files.pythonhosted.org/packages/e0/ed/f1de5c675987a4a7a672250d2c5c9d73d289dbf13410f00ed7181d8017dd/coverage-7.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a", size = 260980, upload-time = "2026-02-09T12:58:45.721Z" },
+ { url = "https://files.pythonhosted.org/packages/b3/e3/fe758d01850aa172419a6743fe76ba8b92c29d181d4f676ffe2dae2ba631/coverage-7.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6", size = 263871, upload-time = "2026-02-09T12:58:47.334Z" },
+ { url = "https://files.pythonhosted.org/packages/b6/76/b829869d464115e22499541def9796b25312b8cf235d3bb00b39f1675395/coverage-7.13.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3", size = 261472, upload-time = "2026-02-09T12:58:48.995Z" },
+ { url = "https://files.pythonhosted.org/packages/14/9e/caedb1679e73e2f6ad240173f55218488bfe043e38da577c4ec977489915/coverage-7.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750", size = 265210, upload-time = "2026-02-09T12:58:51.178Z" },
+ { url = "https://files.pythonhosted.org/packages/3a/10/0dd02cb009b16ede425b49ec344aba13a6ae1dc39600840ea6abcb085ac4/coverage-7.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39", size = 260319, upload-time = "2026-02-09T12:58:53.081Z" },
+ { url = "https://files.pythonhosted.org/packages/92/8e/234d2c927af27c6d7a5ffad5bd2cf31634c46a477b4c7adfbfa66baf7ebb/coverage-7.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0", size = 262638, upload-time = "2026-02-09T12:58:55.258Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/64/e5547c8ff6964e5965c35a480855911b61509cce544f4d442caa759a0702/coverage-7.13.4-cp314-cp314t-win32.whl", hash = "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea", size = 223040, upload-time = "2026-02-09T12:58:56.936Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/96/38086d58a181aac86d503dfa9c47eb20715a79c3e3acbdf786e92e5c09a8/coverage-7.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932", size = 224148, upload-time = "2026-02-09T12:58:58.645Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/72/8d10abd3740a0beb98c305e0c3faf454366221c0f37a8bcf8f60020bb65a/coverage-7.13.4-cp314-cp314t-win_arm64.whl", hash = "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b", size = 222172, upload-time = "2026-02-09T12:59:00.396Z" },
+ { url = "https://files.pythonhosted.org/packages/0d/4a/331fe2caf6799d591109bb9c08083080f6de90a823695d412a935622abb2/coverage-7.13.4-py3-none-any.whl", hash = "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0", size = 211242, upload-time = "2026-02-09T12:59:02.032Z" },
+]
+
+[package.optional-dependencies]
+toml = [
+ { name = "tomli", marker = "python_full_version <= '3.11'" },
+]
+
[[package]]
name = "debugpy"
version = "1.8.20"
@@ -375,6 +502,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
]
+[[package]]
+name = "distlib"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
+]
+
[[package]]
name = "exceptiongroup"
version = "1.3.1"
@@ -414,6 +550,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
]
+[[package]]
+name = "identify"
+version = "2.6.17"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/84/376a3b96e5a8d33a7aa2c5b3b31a4b3c364117184bf0b17418055f6ace66/identify-2.6.17.tar.gz", hash = "sha256:f816b0b596b204c9fdf076ded172322f2723cf958d02f9c3587504834c8ff04d", size = 99579, upload-time = "2026-03-01T20:04:12.702Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/40/66/71c1227dff78aaeb942fed29dd5651f2aec166cc7c9aeea3e8b26a539b7d/identify-2.6.17-py2.py3-none-any.whl", hash = "sha256:be5f8412d5ed4b20f2bd41a65f920990bdccaa6a4a18a08f1eefdcd0bdd885f0", size = 99382, upload-time = "2026-03-01T20:04:11.439Z" },
+]
+
[[package]]
name = "idna"
version = "3.11"
@@ -732,155 +877,12 @@ wheels = [
]
[[package]]
-name = "numpy"
-version = "2.2.6"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
- "python_full_version < '3.11'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" },
- { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" },
- { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" },
- { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" },
- { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" },
- { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" },
- { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" },
- { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" },
- { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" },
- { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" },
- { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" },
- { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" },
- { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" },
- { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" },
- { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" },
- { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" },
- { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" },
- { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" },
- { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" },
- { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" },
- { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
- { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
- { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
- { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
- { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
- { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
- { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
- { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
- { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
- { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
- { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
- { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
- { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
- { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
- { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
- { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
- { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
- { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
- { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" },
- { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" },
- { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
- { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
- { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
- { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
- { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
- { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
- { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
- { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
- { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" },
- { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
- { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" },
- { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" },
- { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" },
- { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" },
-]
-
-[[package]]
-name = "numpy"
-version = "2.4.2"
+name = "nodeenv"
+version = "1.10.0"
source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
- "python_full_version >= '3.14' and sys_platform == 'win32'",
- "python_full_version >= '3.14' and sys_platform == 'emscripten'",
- "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
- "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32'",
- "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten'",
- "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/d3/44/71852273146957899753e69986246d6a176061ea183407e95418c2aa4d9a/numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", size = 16955478, upload-time = "2026-01-31T23:10:25.623Z" },
- { url = "https://files.pythonhosted.org/packages/74/41/5d17d4058bd0cd96bcbd4d9ff0fb2e21f52702aab9a72e4a594efa18692f/numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1", size = 14965467, upload-time = "2026-01-31T23:10:28.186Z" },
- { url = "https://files.pythonhosted.org/packages/49/48/fb1ce8136c19452ed15f033f8aee91d5defe515094e330ce368a0647846f/numpy-2.4.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7", size = 5475172, upload-time = "2026-01-31T23:10:30.848Z" },
- { url = "https://files.pythonhosted.org/packages/40/a9/3feb49f17bbd1300dd2570432961f5c8a4ffeff1db6f02c7273bd020a4c9/numpy-2.4.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73", size = 6805145, upload-time = "2026-01-31T23:10:32.352Z" },
- { url = "https://files.pythonhosted.org/packages/3f/39/fdf35cbd6d6e2fcad42fcf85ac04a85a0d0fbfbf34b30721c98d602fd70a/numpy-2.4.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1", size = 15966084, upload-time = "2026-01-31T23:10:34.502Z" },
- { url = "https://files.pythonhosted.org/packages/1b/46/6fa4ea94f1ddf969b2ee941290cca6f1bfac92b53c76ae5f44afe17ceb69/numpy-2.4.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32", size = 16899477, upload-time = "2026-01-31T23:10:37.075Z" },
- { url = "https://files.pythonhosted.org/packages/09/a1/2a424e162b1a14a5bd860a464ab4e07513916a64ab1683fae262f735ccd2/numpy-2.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390", size = 17323429, upload-time = "2026-01-31T23:10:39.704Z" },
- { url = "https://files.pythonhosted.org/packages/ce/a2/73014149ff250628df72c58204822ac01d768697913881aacf839ff78680/numpy-2.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413", size = 18635109, upload-time = "2026-01-31T23:10:41.924Z" },
- { url = "https://files.pythonhosted.org/packages/6c/0c/73e8be2f1accd56df74abc1c5e18527822067dced5ec0861b5bb882c2ce0/numpy-2.4.2-cp311-cp311-win32.whl", hash = "sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda", size = 6237915, upload-time = "2026-01-31T23:10:45.26Z" },
- { url = "https://files.pythonhosted.org/packages/76/ae/e0265e0163cf127c24c3969d29f1c4c64551a1e375d95a13d32eab25d364/numpy-2.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695", size = 12607972, upload-time = "2026-01-31T23:10:47.021Z" },
- { url = "https://files.pythonhosted.org/packages/29/a5/c43029af9b8014d6ea157f192652c50042e8911f4300f8f6ed3336bf437f/numpy-2.4.2-cp311-cp311-win_arm64.whl", hash = "sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3", size = 10485763, upload-time = "2026-01-31T23:10:50.087Z" },
- { url = "https://files.pythonhosted.org/packages/51/6e/6f394c9c77668153e14d4da83bcc247beb5952f6ead7699a1a2992613bea/numpy-2.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", size = 16667963, upload-time = "2026-01-31T23:10:52.147Z" },
- { url = "https://files.pythonhosted.org/packages/1f/f8/55483431f2b2fd015ae6ed4fe62288823ce908437ed49db5a03d15151678/numpy-2.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", size = 14693571, upload-time = "2026-01-31T23:10:54.789Z" },
- { url = "https://files.pythonhosted.org/packages/2f/20/18026832b1845cdc82248208dd929ca14c9d8f2bac391f67440707fff27c/numpy-2.4.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", size = 5203469, upload-time = "2026-01-31T23:10:57.343Z" },
- { url = "https://files.pythonhosted.org/packages/7d/33/2eb97c8a77daaba34eaa3fa7241a14ac5f51c46a6bd5911361b644c4a1e2/numpy-2.4.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", size = 6550820, upload-time = "2026-01-31T23:10:59.429Z" },
- { url = "https://files.pythonhosted.org/packages/b1/91/b97fdfd12dc75b02c44e26c6638241cc004d4079a0321a69c62f51470c4c/numpy-2.4.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", size = 15663067, upload-time = "2026-01-31T23:11:01.291Z" },
- { url = "https://files.pythonhosted.org/packages/f5/c6/a18e59f3f0b8071cc85cbc8d80cd02d68aa9710170b2553a117203d46936/numpy-2.4.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", size = 16619782, upload-time = "2026-01-31T23:11:03.669Z" },
- { url = "https://files.pythonhosted.org/packages/b7/83/9751502164601a79e18847309f5ceec0b1446d7b6aa12305759b72cf98b2/numpy-2.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", size = 17013128, upload-time = "2026-01-31T23:11:05.913Z" },
- { url = "https://files.pythonhosted.org/packages/61/c4/c4066322256ec740acc1c8923a10047818691d2f8aec254798f3dd90f5f2/numpy-2.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", size = 18345324, upload-time = "2026-01-31T23:11:08.248Z" },
- { url = "https://files.pythonhosted.org/packages/ab/af/6157aa6da728fa4525a755bfad486ae7e3f76d4c1864138003eb84328497/numpy-2.4.2-cp312-cp312-win32.whl", hash = "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", size = 5960282, upload-time = "2026-01-31T23:11:10.497Z" },
- { url = "https://files.pythonhosted.org/packages/92/0f/7ceaaeaacb40567071e94dbf2c9480c0ae453d5bb4f52bea3892c39dc83c/numpy-2.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", size = 12314210, upload-time = "2026-01-31T23:11:12.176Z" },
- { url = "https://files.pythonhosted.org/packages/2f/a3/56c5c604fae6dd40fa2ed3040d005fca97e91bd320d232ac9931d77ba13c/numpy-2.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", size = 10220171, upload-time = "2026-01-31T23:11:14.684Z" },
- { url = "https://files.pythonhosted.org/packages/a1/22/815b9fe25d1d7ae7d492152adbc7226d3eff731dffc38fe970589fcaaa38/numpy-2.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", size = 16663696, upload-time = "2026-01-31T23:11:17.516Z" },
- { url = "https://files.pythonhosted.org/packages/09/f0/817d03a03f93ba9c6c8993de509277d84e69f9453601915e4a69554102a1/numpy-2.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", size = 14688322, upload-time = "2026-01-31T23:11:19.883Z" },
- { url = "https://files.pythonhosted.org/packages/da/b4/f805ab79293c728b9a99438775ce51885fd4f31b76178767cfc718701a39/numpy-2.4.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", size = 5198157, upload-time = "2026-01-31T23:11:22.375Z" },
- { url = "https://files.pythonhosted.org/packages/74/09/826e4289844eccdcd64aac27d13b0fd3f32039915dd5b9ba01baae1f436c/numpy-2.4.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", size = 6546330, upload-time = "2026-01-31T23:11:23.958Z" },
- { url = "https://files.pythonhosted.org/packages/19/fb/cbfdbfa3057a10aea5422c558ac57538e6acc87ec1669e666d32ac198da7/numpy-2.4.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", size = 15660968, upload-time = "2026-01-31T23:11:25.713Z" },
- { url = "https://files.pythonhosted.org/packages/04/dc/46066ce18d01645541f0186877377b9371b8fa8017fa8262002b4ef22612/numpy-2.4.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", size = 16607311, upload-time = "2026-01-31T23:11:28.117Z" },
- { url = "https://files.pythonhosted.org/packages/14/d9/4b5adfc39a43fa6bf918c6d544bc60c05236cc2f6339847fc5b35e6cb5b0/numpy-2.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", size = 17012850, upload-time = "2026-01-31T23:11:30.888Z" },
- { url = "https://files.pythonhosted.org/packages/b7/20/adb6e6adde6d0130046e6fdfb7675cc62bc2f6b7b02239a09eb58435753d/numpy-2.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", size = 18334210, upload-time = "2026-01-31T23:11:33.214Z" },
- { url = "https://files.pythonhosted.org/packages/78/0e/0a73b3dff26803a8c02baa76398015ea2a5434d9b8265a7898a6028c1591/numpy-2.4.2-cp313-cp313-win32.whl", hash = "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", size = 5958199, upload-time = "2026-01-31T23:11:35.385Z" },
- { url = "https://files.pythonhosted.org/packages/43/bc/6352f343522fcb2c04dbaf94cb30cca6fd32c1a750c06ad6231b4293708c/numpy-2.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", size = 12310848, upload-time = "2026-01-31T23:11:38.001Z" },
- { url = "https://files.pythonhosted.org/packages/6e/8d/6da186483e308da5da1cc6918ce913dcfe14ffde98e710bfeff2a6158d4e/numpy-2.4.2-cp313-cp313-win_arm64.whl", hash = "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", size = 10221082, upload-time = "2026-01-31T23:11:40.392Z" },
- { url = "https://files.pythonhosted.org/packages/25/a1/9510aa43555b44781968935c7548a8926274f815de42ad3997e9e83680dd/numpy-2.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", size = 14815866, upload-time = "2026-01-31T23:11:42.495Z" },
- { url = "https://files.pythonhosted.org/packages/36/30/6bbb5e76631a5ae46e7923dd16ca9d3f1c93cfa8d4ed79a129814a9d8db3/numpy-2.4.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", size = 5325631, upload-time = "2026-01-31T23:11:44.7Z" },
- { url = "https://files.pythonhosted.org/packages/46/00/3a490938800c1923b567b3a15cd17896e68052e2145d8662aaf3e1ffc58f/numpy-2.4.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", size = 6646254, upload-time = "2026-01-31T23:11:46.341Z" },
- { url = "https://files.pythonhosted.org/packages/d3/e9/fac0890149898a9b609caa5af7455a948b544746e4b8fe7c212c8edd71f8/numpy-2.4.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", size = 15720138, upload-time = "2026-01-31T23:11:48.082Z" },
- { url = "https://files.pythonhosted.org/packages/ea/5c/08887c54e68e1e28df53709f1893ce92932cc6f01f7c3d4dc952f61ffd4e/numpy-2.4.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", size = 16655398, upload-time = "2026-01-31T23:11:50.293Z" },
- { url = "https://files.pythonhosted.org/packages/4d/89/253db0fa0e66e9129c745e4ef25631dc37d5f1314dad2b53e907b8538e6d/numpy-2.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", size = 17079064, upload-time = "2026-01-31T23:11:52.927Z" },
- { url = "https://files.pythonhosted.org/packages/2a/d5/cbade46ce97c59c6c3da525e8d95b7abe8a42974a1dc5c1d489c10433e88/numpy-2.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", size = 18379680, upload-time = "2026-01-31T23:11:55.22Z" },
- { url = "https://files.pythonhosted.org/packages/40/62/48f99ae172a4b63d981babe683685030e8a3df4f246c893ea5c6ef99f018/numpy-2.4.2-cp313-cp313t-win32.whl", hash = "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", size = 6082433, upload-time = "2026-01-31T23:11:58.096Z" },
- { url = "https://files.pythonhosted.org/packages/07/38/e054a61cfe48ad9f1ed0d188e78b7e26859d0b60ef21cd9de4897cdb5326/numpy-2.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", size = 12451181, upload-time = "2026-01-31T23:11:59.782Z" },
- { url = "https://files.pythonhosted.org/packages/6e/a4/a05c3a6418575e185dd84d0b9680b6bb2e2dc3e4202f036b7b4e22d6e9dc/numpy-2.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1", size = 10290756, upload-time = "2026-01-31T23:12:02.438Z" },
- { url = "https://files.pythonhosted.org/packages/18/88/b7df6050bf18fdcfb7046286c6535cabbdd2064a3440fca3f069d319c16e/numpy-2.4.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", size = 16663092, upload-time = "2026-01-31T23:12:04.521Z" },
- { url = "https://files.pythonhosted.org/packages/25/7a/1fee4329abc705a469a4afe6e69b1ef7e915117747886327104a8493a955/numpy-2.4.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", size = 14698770, upload-time = "2026-01-31T23:12:06.96Z" },
- { url = "https://files.pythonhosted.org/packages/fb/0b/f9e49ba6c923678ad5bc38181c08ac5e53b7a5754dbca8e581aa1a56b1ff/numpy-2.4.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", size = 5208562, upload-time = "2026-01-31T23:12:09.632Z" },
- { url = "https://files.pythonhosted.org/packages/7d/12/d7de8f6f53f9bb76997e5e4c069eda2051e3fe134e9181671c4391677bb2/numpy-2.4.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", size = 6543710, upload-time = "2026-01-31T23:12:11.969Z" },
- { url = "https://files.pythonhosted.org/packages/09/63/c66418c2e0268a31a4cf8a8b512685748200f8e8e8ec6c507ce14e773529/numpy-2.4.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", size = 15677205, upload-time = "2026-01-31T23:12:14.33Z" },
- { url = "https://files.pythonhosted.org/packages/5d/6c/7f237821c9642fb2a04d2f1e88b4295677144ca93285fd76eff3bcba858d/numpy-2.4.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", size = 16611738, upload-time = "2026-01-31T23:12:16.525Z" },
- { url = "https://files.pythonhosted.org/packages/c2/a7/39c4cdda9f019b609b5c473899d87abff092fc908cfe4d1ecb2fcff453b0/numpy-2.4.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", size = 17028888, upload-time = "2026-01-31T23:12:19.306Z" },
- { url = "https://files.pythonhosted.org/packages/da/b3/e84bb64bdfea967cc10950d71090ec2d84b49bc691df0025dddb7c26e8e3/numpy-2.4.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", size = 18339556, upload-time = "2026-01-31T23:12:21.816Z" },
- { url = "https://files.pythonhosted.org/packages/88/f5/954a291bc1192a27081706862ac62bb5920fbecfbaa302f64682aa90beed/numpy-2.4.2-cp314-cp314-win32.whl", hash = "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", size = 6006899, upload-time = "2026-01-31T23:12:24.14Z" },
- { url = "https://files.pythonhosted.org/packages/05/cb/eff72a91b2efdd1bc98b3b8759f6a1654aa87612fc86e3d87d6fe4f948c4/numpy-2.4.2-cp314-cp314-win_amd64.whl", hash = "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", size = 12443072, upload-time = "2026-01-31T23:12:26.33Z" },
- { url = "https://files.pythonhosted.org/packages/37/75/62726948db36a56428fce4ba80a115716dc4fad6a3a4352487f8bb950966/numpy-2.4.2-cp314-cp314-win_arm64.whl", hash = "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", size = 10494886, upload-time = "2026-01-31T23:12:28.488Z" },
- { url = "https://files.pythonhosted.org/packages/36/2f/ee93744f1e0661dc267e4b21940870cabfae187c092e1433b77b09b50ac4/numpy-2.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", size = 14818567, upload-time = "2026-01-31T23:12:30.709Z" },
- { url = "https://files.pythonhosted.org/packages/a7/24/6535212add7d76ff938d8bdc654f53f88d35cddedf807a599e180dcb8e66/numpy-2.4.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", size = 5328372, upload-time = "2026-01-31T23:12:32.962Z" },
- { url = "https://files.pythonhosted.org/packages/5e/9d/c48f0a035725f925634bf6b8994253b43f2047f6778a54147d7e213bc5a7/numpy-2.4.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", size = 6649306, upload-time = "2026-01-31T23:12:34.797Z" },
- { url = "https://files.pythonhosted.org/packages/81/05/7c73a9574cd4a53a25907bad38b59ac83919c0ddc8234ec157f344d57d9a/numpy-2.4.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", size = 15722394, upload-time = "2026-01-31T23:12:36.565Z" },
- { url = "https://files.pythonhosted.org/packages/35/fa/4de10089f21fc7d18442c4a767ab156b25c2a6eaf187c0db6d9ecdaeb43f/numpy-2.4.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", size = 16653343, upload-time = "2026-01-31T23:12:39.188Z" },
- { url = "https://files.pythonhosted.org/packages/b8/f9/d33e4ffc857f3763a57aa85650f2e82486832d7492280ac21ba9efda80da/numpy-2.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", size = 17078045, upload-time = "2026-01-31T23:12:42.041Z" },
- { url = "https://files.pythonhosted.org/packages/c8/b8/54bdb43b6225badbea6389fa038c4ef868c44f5890f95dd530a218706da3/numpy-2.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", size = 18380024, upload-time = "2026-01-31T23:12:44.331Z" },
- { url = "https://files.pythonhosted.org/packages/a5/55/6e1a61ded7af8df04016d81b5b02daa59f2ea9252ee0397cb9f631efe9e5/numpy-2.4.2-cp314-cp314t-win32.whl", hash = "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", size = 6153937, upload-time = "2026-01-31T23:12:47.229Z" },
- { url = "https://files.pythonhosted.org/packages/45/aa/fa6118d1ed6d776b0983f3ceac9b1a5558e80df9365b1c3aa6d42bf9eee4/numpy-2.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", size = 12631844, upload-time = "2026-01-31T23:12:48.997Z" },
- { url = "https://files.pythonhosted.org/packages/32/0a/2ec5deea6dcd158f254a7b372fb09cfba5719419c8d66343bab35237b3fb/numpy-2.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", size = 10565379, upload-time = "2026-01-31T23:12:51.345Z" },
- { url = "https://files.pythonhosted.org/packages/f4/f8/50e14d36d915ef64d8f8bc4a087fc8264d82c785eda6711f80ab7e620335/numpy-2.4.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082", size = 16833179, upload-time = "2026-01-31T23:12:53.5Z" },
- { url = "https://files.pythonhosted.org/packages/17/17/809b5cad63812058a8189e91a1e2d55a5a18fd04611dbad244e8aeae465c/numpy-2.4.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a", size = 14889755, upload-time = "2026-01-31T23:12:55.933Z" },
- { url = "https://files.pythonhosted.org/packages/3e/ea/181b9bcf7627fc8371720316c24db888dcb9829b1c0270abf3d288b2e29b/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920", size = 5399500, upload-time = "2026-01-31T23:12:58.671Z" },
- { url = "https://files.pythonhosted.org/packages/33/9f/413adf3fc955541ff5536b78fcf0754680b3c6d95103230252a2c9408d23/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821", size = 6714252, upload-time = "2026-01-31T23:13:00.518Z" },
- { url = "https://files.pythonhosted.org/packages/91/da/643aad274e29ccbdf42ecd94dafe524b81c87bcb56b83872d54827f10543/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb", size = 15797142, upload-time = "2026-01-31T23:13:02.219Z" },
- { url = "https://files.pythonhosted.org/packages/66/27/965b8525e9cb5dc16481b30a1b3c21e50c7ebf6e9dbd48d0c4d0d5089c7e/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0", size = 16727979, upload-time = "2026-01-31T23:13:04.62Z" },
- { url = "https://files.pythonhosted.org/packages/de/e5/b7d20451657664b07986c2f6e3be564433f5dcaf3482d68eaecd79afaf03/numpy-2.4.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", size = 12502577, upload-time = "2026-01-31T23:13:07.08Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611, upload-time = "2025-12-20T14:08:54.006Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" },
]
[[package]]
@@ -985,138 +987,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
]
-[[package]]
-name = "pandas"
-version = "2.3.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
- "python_full_version < '3.11'",
-]
-dependencies = [
- { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
- { name = "python-dateutil", marker = "python_full_version < '3.11'" },
- { name = "pytz", marker = "python_full_version < '3.11'" },
- { name = "tzdata", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/3d/f7/f425a00df4fcc22b292c6895c6831c0c8ae1d9fac1e024d16f98a9ce8749/pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c", size = 11555763, upload-time = "2025-09-29T23:16:53.287Z" },
- { url = "https://files.pythonhosted.org/packages/13/4f/66d99628ff8ce7857aca52fed8f0066ce209f96be2fede6cef9f84e8d04f/pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a", size = 10801217, upload-time = "2025-09-29T23:17:04.522Z" },
- { url = "https://files.pythonhosted.org/packages/1d/03/3fc4a529a7710f890a239cc496fc6d50ad4a0995657dccc1d64695adb9f4/pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1", size = 12148791, upload-time = "2025-09-29T23:17:18.444Z" },
- { url = "https://files.pythonhosted.org/packages/40/a8/4dac1f8f8235e5d25b9955d02ff6f29396191d4e665d71122c3722ca83c5/pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838", size = 12769373, upload-time = "2025-09-29T23:17:35.846Z" },
- { url = "https://files.pythonhosted.org/packages/df/91/82cc5169b6b25440a7fc0ef3a694582418d875c8e3ebf796a6d6470aa578/pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250", size = 13200444, upload-time = "2025-09-29T23:17:49.341Z" },
- { url = "https://files.pythonhosted.org/packages/10/ae/89b3283800ab58f7af2952704078555fa60c807fff764395bb57ea0b0dbd/pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4", size = 13858459, upload-time = "2025-09-29T23:18:03.722Z" },
- { url = "https://files.pythonhosted.org/packages/85/72/530900610650f54a35a19476eca5104f38555afccda1aa11a92ee14cb21d/pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826", size = 11346086, upload-time = "2025-09-29T23:18:18.505Z" },
- { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" },
- { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" },
- { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" },
- { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" },
- { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" },
- { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" },
- { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702, upload-time = "2025-09-29T23:19:38.296Z" },
- { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload-time = "2025-09-29T23:19:48.856Z" },
- { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload-time = "2025-09-29T23:39:08.659Z" },
- { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload-time = "2025-09-29T23:19:59.765Z" },
- { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693, upload-time = "2025-09-29T23:20:14.098Z" },
- { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002, upload-time = "2025-09-29T23:20:26.76Z" },
- { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971, upload-time = "2025-09-29T23:20:41.344Z" },
- { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" },
- { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671, upload-time = "2025-09-29T23:21:05.024Z" },
- { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807, upload-time = "2025-09-29T23:21:15.979Z" },
- { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872, upload-time = "2025-09-29T23:21:27.165Z" },
- { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371, upload-time = "2025-09-29T23:21:40.532Z" },
- { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333, upload-time = "2025-09-29T23:21:55.77Z" },
- { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120, upload-time = "2025-09-29T23:22:10.109Z" },
- { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991, upload-time = "2025-09-29T23:25:04.889Z" },
- { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227, upload-time = "2025-09-29T23:22:24.343Z" },
- { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056, upload-time = "2025-09-29T23:22:37.762Z" },
- { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189, upload-time = "2025-09-29T23:22:51.688Z" },
- { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912, upload-time = "2025-09-29T23:23:05.042Z" },
- { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160, upload-time = "2025-09-29T23:23:28.57Z" },
- { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233, upload-time = "2025-09-29T23:24:24.876Z" },
- { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635, upload-time = "2025-09-29T23:25:52.486Z" },
- { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079, upload-time = "2025-09-29T23:26:33.204Z" },
- { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049, upload-time = "2025-09-29T23:27:15.384Z" },
- { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638, upload-time = "2025-09-29T23:27:51.625Z" },
- { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834, upload-time = "2025-09-29T23:28:21.289Z" },
- { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925, upload-time = "2025-09-29T23:28:58.261Z" },
- { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071, upload-time = "2025-09-29T23:32:27.484Z" },
- { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504, upload-time = "2025-09-29T23:29:31.47Z" },
- { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702, upload-time = "2025-09-29T23:29:54.591Z" },
- { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535, upload-time = "2025-09-29T23:30:21.003Z" },
- { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582, upload-time = "2025-09-29T23:30:43.391Z" },
- { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963, upload-time = "2025-09-29T23:31:10.009Z" },
- { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" },
-]
-
-[[package]]
-name = "pandas"
-version = "3.0.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
- "python_full_version >= '3.14' and sys_platform == 'win32'",
- "python_full_version >= '3.14' and sys_platform == 'emscripten'",
- "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
- "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'win32'",
- "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform == 'emscripten'",
- "python_full_version >= '3.11' and python_full_version < '3.14' and sys_platform != 'emscripten' and sys_platform != 'win32'",
-]
-dependencies = [
- { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
- { name = "python-dateutil", marker = "python_full_version >= '3.11'" },
- { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/2e/0c/b28ed414f080ee0ad153f848586d61d1878f91689950f037f976ce15f6c8/pandas-3.0.1.tar.gz", hash = "sha256:4186a699674af418f655dbd420ed87f50d56b4cd6603784279d9eef6627823c8", size = 4641901, upload-time = "2026-02-17T22:20:16.434Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/ff/07/c7087e003ceee9b9a82539b40414ec557aa795b584a1a346e89180853d79/pandas-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de09668c1bf3b925c07e5762291602f0d789eca1b3a781f99c1c78f6cac0e7ea", size = 10323380, upload-time = "2026-02-17T22:18:16.133Z" },
- { url = "https://files.pythonhosted.org/packages/c1/27/90683c7122febeefe84a56f2cde86a9f05f68d53885cebcc473298dfc33e/pandas-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:24ba315ba3d6e5806063ac6eb717504e499ce30bd8c236d8693a5fd3f084c796", size = 9923455, upload-time = "2026-02-17T22:18:19.13Z" },
- { url = "https://files.pythonhosted.org/packages/0e/f1/ed17d927f9950643bc7631aa4c99ff0cc83a37864470bc419345b656a41f/pandas-3.0.1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:406ce835c55bac912f2a0dcfaf27c06d73c6b04a5dde45f1fd3169ce31337389", size = 10753464, upload-time = "2026-02-17T22:18:21.134Z" },
- { url = "https://files.pythonhosted.org/packages/2e/7c/870c7e7daec2a6c7ff2ac9e33b23317230d4e4e954b35112759ea4a924a7/pandas-3.0.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:830994d7e1f31dd7e790045235605ab61cff6c94defc774547e8b7fdfbff3dc7", size = 11255234, upload-time = "2026-02-17T22:18:24.175Z" },
- { url = "https://files.pythonhosted.org/packages/5c/39/3653fe59af68606282b989c23d1a543ceba6e8099cbcc5f1d506a7bae2aa/pandas-3.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a64ce8b0f2de1d2efd2ae40b0abe7f8ae6b29fbfb3812098ed5a6f8e235ad9bf", size = 11767299, upload-time = "2026-02-17T22:18:26.824Z" },
- { url = "https://files.pythonhosted.org/packages/9b/31/1daf3c0c94a849c7a8dab8a69697b36d313b229918002ba3e409265c7888/pandas-3.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9832c2c69da24b602c32e0c7b1b508a03949c18ba08d4d9f1c1033426685b447", size = 12333292, upload-time = "2026-02-17T22:18:28.996Z" },
- { url = "https://files.pythonhosted.org/packages/1f/67/af63f83cd6ca603a00fe8530c10a60f0879265b8be00b5930e8e78c5b30b/pandas-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:84f0904a69e7365f79a0c77d3cdfccbfb05bf87847e3a51a41e1426b0edb9c79", size = 9892176, upload-time = "2026-02-17T22:18:31.79Z" },
- { url = "https://files.pythonhosted.org/packages/79/ab/9c776b14ac4b7b4140788eca18468ea39894bc7340a408f1d1e379856a6b/pandas-3.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:4a68773d5a778afb31d12e34f7dd4612ab90de8c6fb1d8ffe5d4a03b955082a1", size = 9151328, upload-time = "2026-02-17T22:18:35.721Z" },
- { url = "https://files.pythonhosted.org/packages/37/51/b467209c08dae2c624873d7491ea47d2b47336e5403309d433ea79c38571/pandas-3.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:476f84f8c20c9f5bc47252b66b4bb25e1a9fc2fa98cead96744d8116cb85771d", size = 10344357, upload-time = "2026-02-17T22:18:38.262Z" },
- { url = "https://files.pythonhosted.org/packages/7c/f1/e2567ffc8951ab371db2e40b2fe068e36b81d8cf3260f06ae508700e5504/pandas-3.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ab749dfba921edf641d4036c4c21c0b3ea70fea478165cb98a998fb2a261955", size = 9884543, upload-time = "2026-02-17T22:18:41.476Z" },
- { url = "https://files.pythonhosted.org/packages/d7/39/327802e0b6d693182403c144edacbc27eb82907b57062f23ef5a4c4a5ea7/pandas-3.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8e36891080b87823aff3640c78649b91b8ff6eea3c0d70aeabd72ea43ab069b", size = 10396030, upload-time = "2026-02-17T22:18:43.822Z" },
- { url = "https://files.pythonhosted.org/packages/3d/fe/89d77e424365280b79d99b3e1e7d606f5165af2f2ecfaf0c6d24c799d607/pandas-3.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:532527a701281b9dd371e2f582ed9094f4c12dd9ffb82c0c54ee28d8ac9520c4", size = 10876435, upload-time = "2026-02-17T22:18:45.954Z" },
- { url = "https://files.pythonhosted.org/packages/b5/a6/2a75320849dd154a793f69c951db759aedb8d1dd3939eeacda9bdcfa1629/pandas-3.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:356e5c055ed9b0da1580d465657bc7d00635af4fd47f30afb23025352ba764d1", size = 11405133, upload-time = "2026-02-17T22:18:48.533Z" },
- { url = "https://files.pythonhosted.org/packages/58/53/1d68fafb2e02d7881df66aa53be4cd748d25cbe311f3b3c85c93ea5d30ca/pandas-3.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9d810036895f9ad6345b8f2a338dd6998a74e8483847403582cab67745bff821", size = 11932065, upload-time = "2026-02-17T22:18:50.837Z" },
- { url = "https://files.pythonhosted.org/packages/75/08/67cc404b3a966b6df27b38370ddd96b3b023030b572283d035181854aac5/pandas-3.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:536232a5fe26dd989bd633e7a0c450705fdc86a207fec7254a55e9a22950fe43", size = 9741627, upload-time = "2026-02-17T22:18:53.905Z" },
- { url = "https://files.pythonhosted.org/packages/86/4f/caf9952948fb00d23795f09b893d11f1cacb384e666854d87249530f7cbe/pandas-3.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f463ebfd8de7f326d38037c7363c6dacb857c5881ab8961fb387804d6daf2f7", size = 9052483, upload-time = "2026-02-17T22:18:57.31Z" },
- { url = "https://files.pythonhosted.org/packages/0b/48/aad6ec4f8d007534c091e9a7172b3ec1b1ee6d99a9cbb936b5eab6c6cf58/pandas-3.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5272627187b5d9c20e55d27caf5f2cd23e286aba25cadf73c8590e432e2b7262", size = 10317509, upload-time = "2026-02-17T22:18:59.498Z" },
- { url = "https://files.pythonhosted.org/packages/a8/14/5990826f779f79148ae9d3a2c39593dc04d61d5d90541e71b5749f35af95/pandas-3.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:661e0f665932af88c7877f31da0dc743fe9c8f2524bdffe23d24fdcb67ef9d56", size = 9860561, upload-time = "2026-02-17T22:19:02.265Z" },
- { url = "https://files.pythonhosted.org/packages/fa/80/f01ff54664b6d70fed71475543d108a9b7c888e923ad210795bef04ffb7d/pandas-3.0.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75e6e292ff898679e47a2199172593d9f6107fd2dd3617c22c2946e97d5df46e", size = 10365506, upload-time = "2026-02-17T22:19:05.017Z" },
- { url = "https://files.pythonhosted.org/packages/f2/85/ab6d04733a7d6ff32bfc8382bf1b07078228f5d6ebec5266b91bfc5c4ff7/pandas-3.0.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1ff8cf1d2896e34343197685f432450ec99a85ba8d90cce2030c5eee2ef98791", size = 10873196, upload-time = "2026-02-17T22:19:07.204Z" },
- { url = "https://files.pythonhosted.org/packages/48/a9/9301c83d0b47c23ac5deab91c6b39fd98d5b5db4d93b25df8d381451828f/pandas-3.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eca8b4510f6763f3d37359c2105df03a7a221a508f30e396a51d0713d462e68a", size = 11370859, upload-time = "2026-02-17T22:19:09.436Z" },
- { url = "https://files.pythonhosted.org/packages/59/fe/0c1fc5bd2d29c7db2ab372330063ad555fb83e08422829c785f5ec2176ca/pandas-3.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:06aff2ad6f0b94a17822cf8b83bbb563b090ed82ff4fe7712db2ce57cd50d9b8", size = 11924584, upload-time = "2026-02-17T22:19:11.562Z" },
- { url = "https://files.pythonhosted.org/packages/d6/7d/216a1588b65a7aa5f4535570418a599d943c85afb1d95b0876fc00aa1468/pandas-3.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:9fea306c783e28884c29057a1d9baa11a349bbf99538ec1da44c8476563d1b25", size = 9742769, upload-time = "2026-02-17T22:19:13.926Z" },
- { url = "https://files.pythonhosted.org/packages/c4/cb/810a22a6af9a4e97c8ab1c946b47f3489c5bca5adc483ce0ffc84c9cc768/pandas-3.0.1-cp313-cp313-win_arm64.whl", hash = "sha256:a8d37a43c52917427e897cb2e429f67a449327394396a81034a4449b99afda59", size = 9043855, upload-time = "2026-02-17T22:19:16.09Z" },
- { url = "https://files.pythonhosted.org/packages/92/fa/423c89086cca1f039cf1253c3ff5b90f157b5b3757314aa635f6bf3e30aa/pandas-3.0.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d54855f04f8246ed7b6fc96b05d4871591143c46c0b6f4af874764ed0d2d6f06", size = 10752673, upload-time = "2026-02-17T22:19:18.304Z" },
- { url = "https://files.pythonhosted.org/packages/22/23/b5a08ec1f40020397f0faba72f1e2c11f7596a6169c7b3e800abff0e433f/pandas-3.0.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e1b677accee34a09e0dc2ce5624e4a58a1870ffe56fc021e9caf7f23cd7668f", size = 10404967, upload-time = "2026-02-17T22:19:20.726Z" },
- { url = "https://files.pythonhosted.org/packages/5c/81/94841f1bb4afdc2b52a99daa895ac2c61600bb72e26525ecc9543d453ebc/pandas-3.0.1-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9cabbdcd03f1b6cd254d6dda8ae09b0252524be1592594c00b7895916cb1324", size = 10320575, upload-time = "2026-02-17T22:19:24.919Z" },
- { url = "https://files.pythonhosted.org/packages/0a/8b/2ae37d66a5342a83adadfd0cb0b4bf9c3c7925424dd5f40d15d6cfaa35ee/pandas-3.0.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ae2ab1f166668b41e770650101e7090824fd34d17915dd9cd479f5c5e0065e9", size = 10710921, upload-time = "2026-02-17T22:19:27.181Z" },
- { url = "https://files.pythonhosted.org/packages/a2/61/772b2e2757855e232b7ccf7cb8079a5711becb3a97f291c953def15a833f/pandas-3.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6bf0603c2e30e2cafac32807b06435f28741135cb8697eae8b28c7d492fc7d76", size = 11334191, upload-time = "2026-02-17T22:19:29.411Z" },
- { url = "https://files.pythonhosted.org/packages/1b/08/b16c6df3ef555d8495d1d265a7963b65be166785d28f06a350913a4fac78/pandas-3.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6c426422973973cae1f4a23e51d4ae85974f44871b24844e4f7de752dd877098", size = 11782256, upload-time = "2026-02-17T22:19:32.34Z" },
- { url = "https://files.pythonhosted.org/packages/55/80/178af0594890dee17e239fca96d3d8670ba0f5ff59b7d0439850924a9c09/pandas-3.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b03f91ae8c10a85c1613102c7bef5229b5379f343030a3ccefeca8a33414cf35", size = 10485047, upload-time = "2026-02-17T22:19:34.605Z" },
- { url = "https://files.pythonhosted.org/packages/bb/8b/4bb774a998b97e6c2fd62a9e6cfdaae133b636fd1c468f92afb4ae9a447a/pandas-3.0.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:99d0f92ed92d3083d140bf6b97774f9f13863924cf3f52a70711f4e7588f9d0a", size = 10322465, upload-time = "2026-02-17T22:19:36.803Z" },
- { url = "https://files.pythonhosted.org/packages/72/3a/5b39b51c64159f470f1ca3b1c2a87da290657ca022f7cd11442606f607d1/pandas-3.0.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3b66857e983208654294bb6477b8a63dee26b37bdd0eb34d010556e91261784f", size = 9910632, upload-time = "2026-02-17T22:19:39.001Z" },
- { url = "https://files.pythonhosted.org/packages/4e/f7/b449ffb3f68c11da12fc06fbf6d2fa3a41c41e17d0284d23a79e1c13a7e4/pandas-3.0.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56cf59638bf24dc9bdf2154c81e248b3289f9a09a6d04e63608c159022352749", size = 10440535, upload-time = "2026-02-17T22:19:41.157Z" },
- { url = "https://files.pythonhosted.org/packages/55/77/6ea82043db22cb0f2bbfe7198da3544000ddaadb12d26be36e19b03a2dc5/pandas-3.0.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1a9f55e0f46951874b863d1f3906dcb57df2d9be5c5847ba4dfb55b2c815249", size = 10893940, upload-time = "2026-02-17T22:19:43.493Z" },
- { url = "https://files.pythonhosted.org/packages/03/30/f1b502a72468c89412c1b882a08f6eed8a4ee9dc033f35f65d0663df6081/pandas-3.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1849f0bba9c8a2fb0f691d492b834cc8dadf617e29015c66e989448d58d011ee", size = 11442711, upload-time = "2026-02-17T22:19:46.074Z" },
- { url = "https://files.pythonhosted.org/packages/0d/f0/ebb6ddd8fc049e98cabac5c2924d14d1dda26a20adb70d41ea2e428d3ec4/pandas-3.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c3d288439e11b5325b02ae6e9cc83e6805a62c40c5a6220bea9beb899c073b1c", size = 11963918, upload-time = "2026-02-17T22:19:48.838Z" },
- { url = "https://files.pythonhosted.org/packages/09/f8/8ce132104074f977f907442790eaae24e27bce3b3b454e82faa3237ff098/pandas-3.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:93325b0fe372d192965f4cca88d97667f49557398bbf94abdda3bf1b591dbe66", size = 9862099, upload-time = "2026-02-17T22:19:51.081Z" },
- { url = "https://files.pythonhosted.org/packages/e6/b7/6af9aac41ef2456b768ef0ae60acf8abcebb450a52043d030a65b4b7c9bd/pandas-3.0.1-cp314-cp314-win_arm64.whl", hash = "sha256:97ca08674e3287c7148f4858b01136f8bdfe7202ad25ad04fec602dd1d29d132", size = 9185333, upload-time = "2026-02-17T22:19:53.266Z" },
- { url = "https://files.pythonhosted.org/packages/66/fc/848bb6710bc6061cb0c5badd65b92ff75c81302e0e31e496d00029fe4953/pandas-3.0.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:58eeb1b2e0fb322befcf2bbc9ba0af41e616abadb3d3414a6bc7167f6cbfce32", size = 10772664, upload-time = "2026-02-17T22:19:55.806Z" },
- { url = "https://files.pythonhosted.org/packages/69/5c/866a9bbd0f79263b4b0db6ec1a341be13a1473323f05c122388e0f15b21d/pandas-3.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cd9af1276b5ca9e298bd79a26bda32fa9cc87ed095b2a9a60978d2ca058eaf87", size = 10421286, upload-time = "2026-02-17T22:19:58.091Z" },
- { url = "https://files.pythonhosted.org/packages/51/a4/2058fb84fb1cfbfb2d4a6d485e1940bb4ad5716e539d779852494479c580/pandas-3.0.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f87a04984d6b63788327cd9f79dda62b7f9043909d2440ceccf709249ca988", size = 10342050, upload-time = "2026-02-17T22:20:01.376Z" },
- { url = "https://files.pythonhosted.org/packages/22/1b/674e89996cc4be74db3c4eb09240c4bb549865c9c3f5d9b086ff8fcfbf00/pandas-3.0.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85fe4c4df62e1e20f9db6ebfb88c844b092c22cd5324bdcf94bfa2fc1b391221", size = 10740055, upload-time = "2026-02-17T22:20:04.328Z" },
- { url = "https://files.pythonhosted.org/packages/d0/f8/e954b750764298c22fa4614376531fe63c521ef517e7059a51f062b87dca/pandas-3.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:331ca75a2f8672c365ae25c0b29e46f5ac0c6551fdace8eec4cd65e4fac271ff", size = 11357632, upload-time = "2026-02-17T22:20:06.647Z" },
- { url = "https://files.pythonhosted.org/packages/6d/02/c6e04b694ffd68568297abd03588b6d30295265176a5c01b7459d3bc35a3/pandas-3.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:15860b1fdb1973fffade772fdb931ccf9b2f400a3f5665aef94a00445d7d8dd5", size = 11810974, upload-time = "2026-02-17T22:20:08.946Z" },
- { url = "https://files.pythonhosted.org/packages/89/41/d7dfb63d2407f12055215070c42fc6ac41b66e90a2946cdc5e759058398b/pandas-3.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:44f1364411d5670efa692b146c748f4ed013df91ee91e9bec5677fb1fd58b937", size = 10884622, upload-time = "2026-02-17T22:20:11.711Z" },
- { url = "https://files.pythonhosted.org/packages/68/b0/34937815889fa982613775e4b97fddd13250f11012d769949c5465af2150/pandas-3.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:108dd1790337a494aa80e38def654ca3f0968cf4f362c85f44c15e471667102d", size = 9452085, upload-time = "2026-02-17T22:20:14.331Z" },
-]
-
[[package]]
name = "parso"
version = "0.8.6"
@@ -1184,6 +1054,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" },
]
+[[package]]
+name = "pre-commit"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cfgv" },
+ { name = "identify" },
+ { name = "nodeenv" },
+ { name = "pyyaml" },
+ { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/f1/6d86a29246dfd2e9b6237f0b5823717f60cad94d47ddc26afa916d21f525/pre_commit-4.5.1.tar.gz", hash = "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61", size = 198232, upload-time = "2025-12-16T21:14:33.552Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl", hash = "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", size = 226437, upload-time = "2025-12-16T21:14:32.409Z" },
+]
+
[[package]]
name = "prompt-toolkit"
version = "3.0.52"
@@ -1492,6 +1378,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
]
+[[package]]
+name = "pytest-cov"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "coverage", extra = ["toml"] },
+ { name = "pluggy" },
+ { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
+]
+
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
@@ -1505,12 +1405,80 @@ wheels = [
]
[[package]]
-name = "pytz"
-version = "2025.2"
+name = "python-discovery"
+version = "1.1.3"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+dependencies = [
+ { name = "filelock" },
+ { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d7/7e/9f3b0dd3a074a6c3e1e79f35e465b1f2ee4b262d619de00cfce523cc9b24/python_discovery-1.1.3.tar.gz", hash = "sha256:7acca36e818cd88e9b2ba03e045ad7e93e1713e29c6bbfba5d90202310b7baa5", size = 56945, upload-time = "2026-03-10T15:08:15.038Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e7/80/73211fc5bfbfc562369b4aa61dc1e4bf07dc7b34df7b317e4539316b809c/python_discovery-1.1.3-py3-none-any.whl", hash = "sha256:90e795f0121bc84572e737c9aa9966311b9fde44ffb88a5953b3ec9b31c6945e", size = 31485, upload-time = "2026-03-10T15:08:13.06Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227, upload-time = "2025-09-25T21:31:46.04Z" },
+ { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019, upload-time = "2025-09-25T21:31:47.706Z" },
+ { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646, upload-time = "2025-09-25T21:31:49.21Z" },
+ { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793, upload-time = "2025-09-25T21:31:50.735Z" },
+ { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293, upload-time = "2025-09-25T21:31:51.828Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872, upload-time = "2025-09-25T21:31:53.282Z" },
+ { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828, upload-time = "2025-09-25T21:31:54.807Z" },
+ { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415, upload-time = "2025-09-25T21:31:55.885Z" },
+ { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561, upload-time = "2025-09-25T21:31:57.406Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+ { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+ { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+ { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+ { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+ { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+ { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+ { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+ { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+ { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+ { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+ { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+ { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+ { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+ { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+ { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+ { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+ { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+ { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+ { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+ { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+ { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+ { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+ { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+ { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+ { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+ { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+ { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+ { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+ { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+ { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+ { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+ { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+ { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+ { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+ { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+ { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+ { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+ { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
]
[[package]]
@@ -1626,6 +1594,31 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
]
+[[package]]
+name = "ruff"
+version = "0.15.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/df/f8629c19c5318601d3121e230f74cbee7a3732339c52b21daa2b82ef9c7d/ruff-0.15.6.tar.gz", hash = "sha256:8394c7bb153a4e3811a4ecdacd4a8e6a4fa8097028119160dffecdcdf9b56ae4", size = 4597916, upload-time = "2026-03-12T23:05:47.51Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9e/2f/4e03a7e5ce99b517e98d3b4951f411de2b0fa8348d39cf446671adcce9a2/ruff-0.15.6-py3-none-linux_armv6l.whl", hash = "sha256:7c98c3b16407b2cf3d0f2b80c80187384bc92c6774d85fefa913ecd941256fff", size = 10508953, upload-time = "2026-03-12T23:05:17.246Z" },
+ { url = "https://files.pythonhosted.org/packages/70/60/55bcdc3e9f80bcf39edf0cd272da6fa511a3d94d5a0dd9e0adf76ceebdb4/ruff-0.15.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee7dcfaad8b282a284df4aa6ddc2741b3f4a18b0555d626805555a820ea181c3", size = 10942257, upload-time = "2026-03-12T23:05:23.076Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/f9/005c29bd1726c0f492bfa215e95154cf480574140cb5f867c797c18c790b/ruff-0.15.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3bd9967851a25f038fc8b9ae88a7fbd1b609f30349231dffaa37b6804923c4bb", size = 10322683, upload-time = "2026-03-12T23:05:33.738Z" },
+ { url = "https://files.pythonhosted.org/packages/5f/74/2f861f5fd7cbb2146bddb5501450300ce41562da36d21868c69b7a828169/ruff-0.15.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13f4594b04e42cd24a41da653886b04d2ff87adbf57497ed4f728b0e8a4866f8", size = 10660986, upload-time = "2026-03-12T23:05:53.245Z" },
+ { url = "https://files.pythonhosted.org/packages/c1/a1/309f2364a424eccb763cdafc49df843c282609f47fe53aa83f38272389e0/ruff-0.15.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2ed8aea2f3fe57886d3f00ea5b8aae5bf68d5e195f487f037a955ff9fbaac9e", size = 10332177, upload-time = "2026-03-12T23:05:56.145Z" },
+ { url = "https://files.pythonhosted.org/packages/30/41/7ebf1d32658b4bab20f8ac80972fb19cd4e2c6b78552be263a680edc55ac/ruff-0.15.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70789d3e7830b848b548aae96766431c0dc01a6c78c13381f423bf7076c66d15", size = 11170783, upload-time = "2026-03-12T23:06:01.742Z" },
+ { url = "https://files.pythonhosted.org/packages/76/be/6d488f6adca047df82cd62c304638bcb00821c36bd4881cfca221561fdfc/ruff-0.15.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:542aaf1de3154cea088ced5a819ce872611256ffe2498e750bbae5247a8114e9", size = 12044201, upload-time = "2026-03-12T23:05:28.697Z" },
+ { url = "https://files.pythonhosted.org/packages/71/68/e6f125df4af7e6d0b498f8d373274794bc5156b324e8ab4bf5c1b4fc0ec7/ruff-0.15.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c22e6f02c16cfac3888aa636e9eba857254d15bbacc9906c9689fdecb1953ab", size = 11421561, upload-time = "2026-03-12T23:05:31.236Z" },
+ { url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e", size = 11310928, upload-time = "2026-03-12T23:05:45.288Z" },
+ { url = "https://files.pythonhosted.org/packages/8c/26/b75f8c421f5654304b89471ed384ae8c7f42b4dff58fa6ce1626d7f2b59a/ruff-0.15.6-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:70d263770d234912374493e8cc1e7385c5d49376e41dfa51c5c3453169dc581c", size = 11235186, upload-time = "2026-03-12T23:05:50.677Z" },
+ { url = "https://files.pythonhosted.org/packages/fc/d4/d5a6d065962ff7a68a86c9b4f5500f7d101a0792078de636526c0edd40da/ruff-0.15.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:55a1ad63c5a6e54b1f21b7514dfadc0c7fb40093fa22e95143cf3f64ebdcd512", size = 10635231, upload-time = "2026-03-12T23:05:37.044Z" },
+ { url = "https://files.pythonhosted.org/packages/d6/56/7c3acf3d50910375349016cf33de24be021532042afbed87942858992491/ruff-0.15.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8dc473ba093c5ec238bb1e7429ee676dca24643c471e11fbaa8a857925b061c0", size = 10340357, upload-time = "2026-03-12T23:06:04.748Z" },
+ { url = "https://files.pythonhosted.org/packages/06/54/6faa39e9c1033ff6a3b6e76b5df536931cd30caf64988e112bbf91ef5ce5/ruff-0.15.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:85b042377c2a5561131767974617006f99f7e13c63c111b998f29fc1e58a4cfb", size = 10860583, upload-time = "2026-03-12T23:05:58.978Z" },
+ { url = "https://files.pythonhosted.org/packages/cb/1e/509a201b843b4dfb0b32acdedf68d951d3377988cae43949ba4c4133a96a/ruff-0.15.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cef49e30bc5a86a6a92098a7fbf6e467a234d90b63305d6f3ec01225a9d092e0", size = 11410976, upload-time = "2026-03-12T23:05:39.955Z" },
+ { url = "https://files.pythonhosted.org/packages/6c/25/3fc9114abf979a41673ce877c08016f8e660ad6cf508c3957f537d2e9fa9/ruff-0.15.6-py3-none-win32.whl", hash = "sha256:bbf67d39832404812a2d23020dda68fee7f18ce15654e96fb1d3ad21a5fe436c", size = 10616872, upload-time = "2026-03-12T23:05:42.451Z" },
+ { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
+ { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
+]
+
[[package]]
name = "selenium"
version = "4.41.0"
@@ -1803,21 +1796,19 @@ wheels = [
[[package]]
name = "tornado"
-version = "6.5.4"
+version = "6.5.5"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/37/1d/0a336abf618272d53f62ebe274f712e213f5a03c0b2339575430b8362ef2/tornado-6.5.4.tar.gz", hash = "sha256:a22fa9047405d03260b483980635f0b041989d8bcc9a313f8fe18b411d84b1d7", size = 513632, upload-time = "2025-12-15T19:21:03.836Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/f1/3173dfa4a18db4a9b03e5d55325559dab51ee653763bb8745a75af491286/tornado-6.5.5.tar.gz", hash = "sha256:192b8f3ea91bd7f1f50c06955416ed76c6b72f96779b962f07f911b91e8d30e9", size = 516006, upload-time = "2026-03-10T21:31:02.067Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/ab/a9/e94a9d5224107d7ce3cc1fab8d5dc97f5ea351ccc6322ee4fb661da94e35/tornado-6.5.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d6241c1a16b1c9e4cc28148b1cda97dd1c6cb4fb7068ac1bedc610768dff0ba9", size = 443909, upload-time = "2025-12-15T19:20:48.382Z" },
- { url = "https://files.pythonhosted.org/packages/db/7e/f7b8d8c4453f305a51f80dbb49014257bb7d28ccb4bbb8dd328ea995ecad/tornado-6.5.4-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2d50f63dda1d2cac3ae1fa23d254e16b5e38153758470e9956cbc3d813d40843", size = 442163, upload-time = "2025-12-15T19:20:49.791Z" },
- { url = "https://files.pythonhosted.org/packages/ba/b5/206f82d51e1bfa940ba366a8d2f83904b15942c45a78dd978b599870ab44/tornado-6.5.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1cf66105dc6acb5af613c054955b8137e34a03698aa53272dbda4afe252be17", size = 445746, upload-time = "2025-12-15T19:20:51.491Z" },
- { url = "https://files.pythonhosted.org/packages/8e/9d/1a3338e0bd30ada6ad4356c13a0a6c35fbc859063fa7eddb309183364ac1/tornado-6.5.4-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50ff0a58b0dc97939d29da29cd624da010e7f804746621c78d14b80238669335", size = 445083, upload-time = "2025-12-15T19:20:52.778Z" },
- { url = "https://files.pythonhosted.org/packages/50/d4/e51d52047e7eb9a582da59f32125d17c0482d065afd5d3bc435ff2120dc5/tornado-6.5.4-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5fb5e04efa54cf0baabdd10061eb4148e0be137166146fff835745f59ab9f7f", size = 445315, upload-time = "2025-12-15T19:20:53.996Z" },
- { url = "https://files.pythonhosted.org/packages/27/07/2273972f69ca63dbc139694a3fc4684edec3ea3f9efabf77ed32483b875c/tornado-6.5.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9c86b1643b33a4cd415f8d0fe53045f913bf07b4a3ef646b735a6a86047dda84", size = 446003, upload-time = "2025-12-15T19:20:56.101Z" },
- { url = "https://files.pythonhosted.org/packages/d1/83/41c52e47502bf7260044413b6770d1a48dda2f0246f95ee1384a3cd9c44a/tornado-6.5.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:6eb82872335a53dd063a4f10917b3efd28270b56a33db69009606a0312660a6f", size = 445412, upload-time = "2025-12-15T19:20:57.398Z" },
- { url = "https://files.pythonhosted.org/packages/10/c7/bc96917f06cbee182d44735d4ecde9c432e25b84f4c2086143013e7b9e52/tornado-6.5.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6076d5dda368c9328ff41ab5d9dd3608e695e8225d1cd0fd1e006f05da3635a8", size = 445392, upload-time = "2025-12-15T19:20:58.692Z" },
- { url = "https://files.pythonhosted.org/packages/0c/1a/d7592328d037d36f2d2462f4bc1fbb383eec9278bc786c1b111cbbd44cfa/tornado-6.5.4-cp39-abi3-win32.whl", hash = "sha256:1768110f2411d5cd281bac0a090f707223ce77fd110424361092859e089b38d1", size = 446481, upload-time = "2025-12-15T19:21:00.008Z" },
- { url = "https://files.pythonhosted.org/packages/d6/6d/c69be695a0a64fd37a97db12355a035a6d90f79067a3cf936ec2b1dc38cd/tornado-6.5.4-cp39-abi3-win_amd64.whl", hash = "sha256:fa07d31e0cd85c60713f2b995da613588aa03e1303d75705dca6af8babc18ddc", size = 446886, upload-time = "2025-12-15T19:21:01.287Z" },
- { url = "https://files.pythonhosted.org/packages/50/49/8dc3fd90902f70084bd2cd059d576ddb4f8bb44c2c7c0e33a11422acb17e/tornado-6.5.4-cp39-abi3-win_arm64.whl", hash = "sha256:053e6e16701eb6cbe641f308f4c1a9541f91b6261991160391bfc342e8a551a1", size = 445910, upload-time = "2025-12-15T19:21:02.571Z" },
+ { url = "https://files.pythonhosted.org/packages/59/8c/77f5097695f4dd8255ecbd08b2a1ed8ba8b953d337804dd7080f199e12bf/tornado-6.5.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:487dc9cc380e29f58c7ab88f9e27cdeef04b2140862e5076a66fb6bb68bb1bfa", size = 445983, upload-time = "2026-03-10T21:30:44.28Z" },
+ { url = "https://files.pythonhosted.org/packages/ab/5e/7625b76cd10f98f1516c36ce0346de62061156352353ef2da44e5c21523c/tornado-6.5.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:65a7f1d46d4bb41df1ac99f5fcb685fb25c7e61613742d5108b010975a9a6521", size = 444246, upload-time = "2026-03-10T21:30:46.571Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/04/7b5705d5b3c0fab088f434f9c83edac1573830ca49ccf29fb83bf7178eec/tornado-6.5.5-cp39-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e74c92e8e65086b338fd56333fb9a68b9f6f2fe7ad532645a290a464bcf46be5", size = 447229, upload-time = "2026-03-10T21:30:48.273Z" },
+ { url = "https://files.pythonhosted.org/packages/34/01/74e034a30ef59afb4097ef8659515e96a39d910b712a89af76f5e4e1f93c/tornado-6.5.5-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:435319e9e340276428bbdb4e7fa732c2d399386d1de5686cb331ec8eee754f07", size = 448192, upload-time = "2026-03-10T21:30:51.22Z" },
+ { url = "https://files.pythonhosted.org/packages/be/00/fe9e02c5a96429fce1a1d15a517f5d8444f9c412e0bb9eadfbe3b0fc55bf/tornado-6.5.5-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3f54aa540bdbfee7b9eb268ead60e7d199de5021facd276819c193c0fb28ea4e", size = 448039, upload-time = "2026-03-10T21:30:53.52Z" },
+ { url = "https://files.pythonhosted.org/packages/82/9e/656ee4cec0398b1d18d0f1eb6372c41c6b889722641d84948351ae19556d/tornado-6.5.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:36abed1754faeb80fbd6e64db2758091e1320f6bba74a4cf8c09cd18ccce8aca", size = 447445, upload-time = "2026-03-10T21:30:55.541Z" },
+ { url = "https://files.pythonhosted.org/packages/5a/76/4921c00511f88af86a33de770d64141170f1cfd9c00311aea689949e274e/tornado-6.5.5-cp39-abi3-win32.whl", hash = "sha256:dd3eafaaeec1c7f2f8fdcd5f964e8907ad788fe8a5a32c4426fbbdda621223b7", size = 448582, upload-time = "2026-03-10T21:30:57.142Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/23/f6c6112a04d28eed765e374435fb1a9198f73e1ec4b4024184f21faeb1ad/tornado-6.5.5-cp39-abi3-win_amd64.whl", hash = "sha256:6443a794ba961a9f619b1ae926a2e900ac20c34483eea67be4ed8f1e58d3ef7b", size = 448990, upload-time = "2026-03-10T21:30:58.857Z" },
+ { url = "https://files.pythonhosted.org/packages/b7/c8/876602cbc96469911f0939f703453c1157b0c826ecb05bdd32e023397d4e/tornado-6.5.5-cp39-abi3-win_arm64.whl", hash = "sha256:2c9a876e094109333f888539ddb2de4361743e5d21eece20688e3e351e4990a6", size = 448016, upload-time = "2026-03-10T21:31:00.43Z" },
]
[[package]]
@@ -1898,15 +1889,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
]
-[[package]]
-name = "tzdata"
-version = "2025.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
-]
-
[[package]]
name = "undetected-chromedriver"
version = "3.5.5"
@@ -1932,6 +1914,22 @@ socks = [
{ name = "pysocks" },
]
+[[package]]
+name = "virtualenv"
+version = "21.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "distlib" },
+ { name = "filelock" },
+ { name = "platformdirs" },
+ { name = "python-discovery" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/aa/92/58199fe10049f9703c2666e809c4f686c54ef0a68b0f6afccf518c0b1eb9/virtualenv-21.2.0.tar.gz", hash = "sha256:1720dc3a62ef5b443092e3f499228599045d7fea4c79199770499df8becf9098", size = 5840618, upload-time = "2026-03-09T17:24:38.013Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl", hash = "sha256:1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f", size = 5825084, upload-time = "2026-03-09T17:24:35.378Z" },
+]
+
[[package]]
name = "wcwidth"
version = "0.6.0"
@@ -1943,7 +1941,7 @@ wheels = [
[[package]]
name = "websearcher"
-version = "0.6.9"
+version = "0.7.0"
source = { editable = "." }
dependencies = [
{ name = "beautifulsoup4" },
@@ -1961,11 +1959,12 @@ dependencies = [
[package.dev-dependencies]
dev = [
{ name = "ipykernel" },
- { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
- { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
{ name = "polars" },
+ { name = "pre-commit" },
{ name = "pyarrow" },
{ name = "pytest" },
+ { name = "pytest-cov" },
+ { name = "ruff" },
{ name = "setuptools" },
{ name = "syrupy" },
{ name = "tabulate" },
@@ -1989,10 +1988,12 @@ requires-dist = [
[package.metadata.requires-dev]
dev = [
{ name = "ipykernel", specifier = ">=6.29.5" },
- { name = "pandas", specifier = ">=2.2.3" },
{ name = "polars", specifier = ">=1.37.1" },
+ { name = "pre-commit", specifier = ">=4.5.1" },
{ name = "pyarrow", specifier = ">=23.0.0" },
{ name = "pytest", specifier = ">=8.3.4" },
+ { name = "pytest-cov", specifier = ">=7.0.0" },
+ { name = "ruff", specifier = ">=0.15.6" },
{ name = "setuptools", specifier = ">=80.9.0" },
{ name = "syrupy", specifier = ">=4.8.1" },
{ name = "tabulate", specifier = ">=0.9.0" },