Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/deploy-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ jobs:
# - name: Validate notebook structure
# run: python docs-site/scripts/validate-notebooks.py

- name: Validate notebook patterns
run: python docs-site/scripts/validate-notebook-patterns.py

- name: Convert notebooks to HTML
run: uv run --group case-studies python docs-site/scripts/convert-notebooks.py

Expand Down
214 changes: 214 additions & 0 deletions docs-site/scripts/validate-notebook-patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""Validate that case study notebooks follow required patterns.

Notebooks that call any everyrow operation (merge, agent_map, screen, rank,
dedupe) must:
1. Conditionally install everyrow (try/except ImportError + pip install)
2. Conditionally set EVERYROW_API_KEY (check os.environ before setting)
3. Wrap all tool calls inside `async with create_session(name="...") as session:`
blocks, with `session.get_url()` printed for observability.

Notebooks that don't use any everyrow ops are skipped.
"""

import json
import re
import sys
from pathlib import Path

SCRIPT_DIR = Path(__file__).resolve().parent
DOCS_SITE_DIR = SCRIPT_DIR.parent
REPO_ROOT = DOCS_SITE_DIR.parent
NOTEBOOKS_DIR = REPO_ROOT / "docs" / "case_studies"

# everyrow operations that must be wrapped in create_session
EVERYROW_OPS = {"merge", "agent_map", "screen", "rank", "dedupe"}

# Pattern: function call like `await merge(`, `await screen(`, etc.
# Also matches direct calls without await, and _async variants
OP_CALL_RE = re.compile(
r"\b(?:await\s+)?(?:" + "|".join(EVERYROW_OPS) + r")(?:_async)?\s*\("
)


def get_code_cells(notebook_path: Path) -> list[str]:
"""Extract source code from all code cells in a notebook."""
with open(notebook_path) as f:
nb = json.load(f)
cells = []
for cell in nb.get("cells", []):
if cell.get("cell_type") == "code":
source = cell.get("source", [])
if isinstance(source, list):
cells.append("".join(source))
else:
cells.append(source)
return cells


def check_conditional_pip_install(code_cells: list[str]) -> list[str]:
"""Check for conditional pip install of everyrow.

Accepted patterns:
try:
import everyrow
except ImportError:
%pip install everyrow (or !pip install everyrow)
"""
errors = []
all_code = "\n".join(code_cells)

has_pip_install = bool(re.search(r"[%!]pip install\b.*\beveryrow\b", all_code))
has_try_except = bool(
re.search(
r"try\s*:.*?import\s+everyrow.*?except\s+(?:Import|Module)(?:Error|NotFoundError)",
all_code,
re.DOTALL,
)
)

if not has_pip_install:
errors.append(
"Missing `%pip install everyrow`. "
"Add a setup cell with: try/except ImportError -> %pip install everyrow"
)
elif not has_try_except:
errors.append(
"pip install everyrow is not conditional. "
"Wrap it in: try: import everyrow / except ImportError: %pip install everyrow"
)

return errors


def check_conditional_api_key(code_cells: list[str]) -> list[str]:
"""Check for conditional EVERYROW_API_KEY setup.

Accepted pattern:
if "EVERYROW_API_KEY" not in os.environ:
os.environ["EVERYROW_API_KEY"] = "..."
"""
errors = []
all_code = "\n".join(code_cells)

has_key_reference = "EVERYROW_API_KEY" in all_code
has_conditional = bool(
re.search(
r'if\s+["\']EVERYROW_API_KEY["\']\s+not\s+in\s+os\.environ',
all_code,
)
)

if not has_key_reference:
errors.append(
"Missing EVERYROW_API_KEY setup. "
'Add: if "EVERYROW_API_KEY" not in os.environ: os.environ["EVERYROW_API_KEY"] = "..."'
)
elif not has_conditional:
errors.append(
"EVERYROW_API_KEY is not set conditionally. "
'Use: if "EVERYROW_API_KEY" not in os.environ: os.environ["EVERYROW_API_KEY"] = "..."'
)

return errors


def check_create_session_wrapping(code_cells: list[str]) -> list[str]:
"""Check that everyrow tool calls are wrapped in create_session.

Requirements:
- If any everyrow op is called, `create_session(name=` must appear in the notebook
- `session.get_url()` or `task_id` must be printed for observability
"""
errors = []
all_code = "\n".join(code_cells)

# Find all everyrow op calls
op_calls = OP_CALL_RE.findall(all_code)
if not op_calls:
return [] # No everyrow ops used, nothing to check

# Check that create_session is used with a name
has_create_session = bool(
re.search(r"create_session\s*\(\s*name\s*=", all_code)
)
if not has_create_session:
errors.append(
"everyrow operations found but not wrapped in "
'`async with create_session(name="...") as session:`. '
"All tool calls must run inside a named session."
)

# Check for observability: session.get_url() or task_id printed
has_observability = bool(
re.search(r"session\.get_url\(\)|\.task_id|\.session_id", all_code)
)
if not has_observability:
errors.append(
"Missing session observability. "
"Add `print(f\"Session URL: {session.get_url()}\")` inside the create_session block."
)

return errors


def uses_everyrow_ops(code_cells: list[str]) -> bool:
"""Check if any everyrow operations are called in the notebook."""
all_code = "\n".join(code_cells)
return bool(OP_CALL_RE.search(all_code))


def validate_notebook(notebook_path: Path) -> list[str]:
"""Validate a notebook's patterns. Returns list of error messages."""
slug = notebook_path.parent.name
code_cells = get_code_cells(notebook_path)

if not code_cells:
return [f"{slug}: No code cells found"]

# Only enforce setup and session checks if notebook actually calls everyrow ops
if not uses_everyrow_ops(code_cells):
return []

all_errors = []
for check_fn in [
check_conditional_pip_install,
check_conditional_api_key,
check_create_session_wrapping,
]:
for error in check_fn(code_cells):
all_errors.append(f"{slug}: {error}")

return all_errors


def main() -> int:
notebooks = sorted(NOTEBOOKS_DIR.glob("*/notebook.ipynb"))

if not notebooks:
print(f"No notebooks found in {NOTEBOOKS_DIR}")
return 1

all_errors = []
passed = 0
for notebook in notebooks:
errors = validate_notebook(notebook)
if errors:
all_errors.extend(errors)
else:
passed += 1

if all_errors:
print("Notebook pattern validation failed:\n")
for error in all_errors:
print(f" - {error}")
print(f"\n{len(all_errors)} error(s) across {len(notebooks)} notebooks")
print(f"{passed}/{len(notebooks)} notebooks passed all checks")
return 1

print(f"All {len(notebooks)} notebooks pass pattern checks")
return 0


if __name__ == "__main__":
sys.exit(main())
57 changes: 24 additions & 33 deletions docs/case_studies/dedupe-crm-company-records/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,24 @@
{
"cell_type": "markdown",
"metadata": {},
"source": "# How to use LLMs to deduplicate CRM Data\n\nThis notebook demonstrates how to use the everyrow SDK's `dedupe` operation to deduplicate messy CRM data using AI-powered semantic matching."
},
{
"cell_type": "code",
"execution_count": null,
"id": "setup-5c9f444a",
"metadata": {},
"outputs": [],
"source": [
"# How to use LLMs to deduplicate CRM Data\n",
"# Setup: install everyrow if needed and configure API key\n",
"try:\n",
" import everyrow\n",
"except ImportError:\n",
" %pip install everyrow\n",
"\n",
"This notebook demonstrates how to use the everyrow SDK's `dedupe` operation to deduplicate messy CRM data using AI-powered semantic matching."
"import os\n",
"if \"EVERYROW_API_KEY\" not in os.environ:\n",
" os.environ[\"EVERYROW_API_KEY\"] = \"your-api-key-here\" # Get one at everyrow.io\n"
]
},
{
Comment on lines 20 to 26
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The call to await client.__aexit__() is incorrect as it's missing the three required arguments for an async context manager, which will cause a TypeError on exit.
Severity: HIGH

Suggested Fix

The __aexit__ method should be called with the standard three None arguments to signify a clean exit from the context manager. Change await client.__aexit__() to await client.__aexit__(None, None, None) in src/everyrow/session.py.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: docs/case_studies/dedupe-crm-company-records/notebook.ipynb#L20-L26

Potential issue: The `__aexit__` method in `src/everyrow/session.py` is called without
any arguments. The async context manager protocol requires `__aexit__` to be called with
three arguments: `exc_type`, `exc_val`, and `exc_tb`. The `AuthenticatedClient` forwards
this call to the underlying `httpx.AsyncClient`, whose `__aexit__` method will raise a
`TypeError` because it's missing the required positional arguments. This will cause an
unhandled exception whenever a session is created and closed using the `create_session`
context manager, such as when `single_agent()` is called without an explicit session.

Did we get this right? 👍 / 👎 to inform future reviews.

Expand All @@ -23,19 +37,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# !pip install everyrow\n",
"from datetime import datetime\n",
"from textwrap import dedent\n",
"\n",
"import pandas as pd\n",
"# load API key from environment/.env file or set it directly in the notebook\n",
"from dotenv import load_dotenv\n",
"load_dotenv()\n",
"# import os\n",
"# os.environ[\"EVERYROW_API_KEY\"] = \"get an API key on everyrow.io. $20 free\"\n",
"from everyrow.ops import dedupe\n"
]
"source": "from datetime import datetime\nfrom textwrap import dedent\n\nimport pandas as pd\nfrom dotenv import load_dotenv\n\nfrom everyrow import create_session\nfrom everyrow.ops import dedupe\n\nload_dotenv()"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -204,7 +206,7 @@
}
],
"source": [
"data = pd.read_csv(\"https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/main/docs/data/case_01_crm_data.csv\", engine=\"pyarrow\")\n",
"data = pd.read_csv(\"../data/case_01_crm_data.csv\", engine=\"pyarrow\")\n",
"\n",
"print(f\"Total records: {len(data)}\")\n",
"data.sort_values(by=\"company_name\").head(15)"
Expand Down Expand Up @@ -235,18 +237,7 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"equivalence_relation = dedent(\"\"\"\n",
" Two entries are duplicates if they include data for the same legal entity.\n",
"\"\"\")\n",
"\n",
"print(\"Deduplicating CRM data...\\n\")\n",
"\n",
"result = await dedupe(\n",
" input=data,\n",
" equivalence_relation=equivalence_relation,\n",
")"
]
"source": "equivalence_relation = dedent(\"\"\"\n Two entries are duplicates if they include data for the same legal entity.\n\"\"\")\n\nprint(\"Deduplicating CRM data...\\n\")\n\nasync with create_session(name=\"CRM Deduplication\") as session:\n print(f\"Session URL: {session.get_url()}\")\n result = await dedupe(\n session=session,\n input=data,\n equivalence_relation=equivalence_relation,\n )"
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -611,9 +602,6 @@
}
],
"metadata": {
"everyrow": {
"description": "Python notebook cleaning 500 CRM records with inconsistent company names, missing contacts, and partial email matches. Uses everyrow's dedupe() with a plain-English equivalence relation to find and group semantic duplicates."
},
"kernelspec": {
"display_name": ".venv",
"language": "python",
Expand All @@ -631,8 +619,11 @@
"pygments_lexer": "ipython3",
"version": "3.12.11"
},
"language_version": "3.12"
"language_version": "3.12",
"everyrow": {
"description": "Python notebook cleaning 500 CRM records with inconsistent company names, missing contacts, and partial email matches. Uses everyrow's dedupe() with a plain-English equivalence relation to find and group semantic duplicates."
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,24 @@
"`MEDIUM` and `HIGH` run full research agents that search, read, and cross-reference sources. For these, model selection matters a lot — and we choose models based on their position on the **Pareto frontier** of accuracy, cost, and speed. This notebook shows how."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "setup-8e478d18",
"metadata": {},
"outputs": [],
"source": [
"# Setup: install everyrow if needed and configure API key\n",
"try:\n",
" import everyrow\n",
"except ImportError:\n",
" %pip install everyrow\n",
"\n",
"import os\n",
"if \"EVERYROW_API_KEY\" not in os.environ:\n",
" os.environ[\"EVERYROW_API_KEY\"] = \"your-api-key-here\" # Get one at everyrow.io\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -667,7 +685,7 @@
{
"cell_type": "markdown",
"metadata": {},
"source": "## 6. Choosing the right effort level\n\n**`LOW`** is the default, and it's the right choice for most tasks that don't require web research — classifying rows, extracting fields, reformatting data. It runs a single LLM call with no tool use, so it's fast and cheap. Because DRB measures agentic information retrieval, the DRB score for the `LOW` model isn't very meaningful here: in practice `LOW` doesn't do research at all.\n\n**`MEDIUM`** turns on the research agent. Gemini 3 Flash (low) sits on the cost Pareto frontier — it's the cheapest model that delivers strong research accuracy. Use this when you need agents to look things up on the web but want to keep costs down.\n\n**`HIGH`** uses Claude 4.6 Opus (low), which sits on both the cost and speed Pareto frontiers. It's the fastest high-accuracy model on DRB and delivers the best score-per-dollar among top-tier models. Use this when accuracy matters and you're willing to pay more per row.\n\n**Want the absolute best accuracy?** You can override the model directly by setting `effort_level=None` and specifying all parameters explicitly:\n\n```python\nfrom everyrow.ops import agent_map\nfrom everyrow.task import LLM\n\nresult = await agent_map(\n task=\"Find each company's latest funding round\",\n input=companies_df,\n effort_level=None,\n llm=LLM.CLAUDE_4_6_OPUS_HIGH,\n iteration_budget=10,\n include_reasoning=True,\n)\n```\n\nClaude 4.6 Opus (high) is the top-scoring model on DRB, but it costs roughly twice as much and takes about three times as long as the `HIGH` default. For most workloads the `HIGH` preset already captures the bulk of that accuracy at a fraction of the price — but the option is there when you need it.\n\nWe re-run these benchmarks as new models launch, so the model behind each effort level may change over time. You always get the current best trade-off without changing your code."
"source": "## 6. Choosing the right effort level\n\n**`LOW`** is the default, and it's the right choice for most tasks that don't require web research — classifying rows, extracting fields, reformatting data. It runs a single LLM call with no tool use, so it's fast and cheap. Because DRB measures agentic information retrieval, the DRB score for the `LOW` model isn't very meaningful here: in practice `LOW` doesn't do research at all.\n\n**`MEDIUM`** turns on the research agent. Gemini 3 Flash (low) sits on the cost Pareto frontier — it's the cheapest model that delivers strong research accuracy. Use this when you need agents to look things up on the web but want to keep costs down.\n\n**`HIGH`** uses Claude 4.6 Opus (low), which sits on both the cost and speed Pareto frontiers. It's the fastest high-accuracy model on DRB and delivers the best score-per-dollar among top-tier models. Use this when accuracy matters and you're willing to pay more per row.\n\n**Want the absolute best accuracy?** You can override the model directly by setting `effort_level=None` and specifying all parameters explicitly:\n\n```python\nfrom everyrow.ops import agent_map\nfrom everyrow.task import LLM\n\nresult = await agent_map(\n task=\"Find each company's latest funding round\",\n input=companies_df,\n effort_level=None,\n llm=LLM.CLAUDE_4_6_OPUS_HIGH,\n iteration_budget=10,\n include_research=True,\n)\n```\n\nClaude 4.6 Opus (high) is the top-scoring model on DRB, but it costs roughly twice as much and takes about three times as long as the `HIGH` default. For most workloads the `HIGH` preset already captures the bulk of that accuracy at a fraction of the price — but the option is there when you need it.\n\nWe re-run these benchmarks as new models launch, so the model behind each effort level may change over time. You always get the current best trade-off without changing your code."
}
],
"metadata": {
Expand All @@ -691,4 +709,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Loading