diff --git a/docs-site/src/app/globals.css b/docs-site/src/app/globals.css index 5d67b491..143df9fe 100644 --- a/docs-site/src/app/globals.css +++ b/docs-site/src/app/globals.css @@ -94,6 +94,12 @@ a:hover { letter-spacing: 0.05em; color: var(--muted); margin-bottom: 0.5rem; + text-decoration: none; + display: block; +} + +a.docs-sidebar-section-title:hover { + color: var(--foreground); } .docs-sidebar-nav { diff --git a/docs-site/src/app/page.tsx b/docs-site/src/app/page.tsx index 0d62a621..8854057f 100644 --- a/docs-site/src/app/page.tsx +++ b/docs-site/src/app/page.tsx @@ -33,6 +33,12 @@ const SECTION_DESCRIPTIONS: Record = { "Case Studies": "Real-world examples with Jupyter notebooks", }; +const SECTION_LINKS: Record = { + "API Reference": "/api", + Guides: "/guides", + "Case Studies": "/notebooks", +}; + const SECTION_DISPLAY_TITLES: Record = { Overview: "Getting Started", }; @@ -44,8 +50,10 @@ function SectionCard({ section }: { section: NavSection }) { if (!firstItem) return null; + const href = SECTION_LINKS[section.title] || `/${firstItem.slug}`; + return ( - +
{icon === "rocket" && ( ({ url: `${baseUrl}/${slug}`, lastModified: new Date(), changeFrequency: "weekly" as const, - priority: 0.8, + priority: hubSlugs.has(slug) ? 0.9 : 0.8, })); const notebookPages = notebookSlugs.map((slug) => ({ diff --git a/docs-site/src/components/Sidebar.tsx b/docs-site/src/components/Sidebar.tsx index 9c40adf0..facd8243 100644 --- a/docs-site/src/components/Sidebar.tsx +++ b/docs-site/src/components/Sidebar.tsx @@ -26,19 +26,32 @@ export function Sidebar({ navigation, isOpen, onClose }: SidebarProps) { {navigation.map((section) => (
-
{section.title}
+ {section.href ? ( + + {section.title} + + ) : ( +
{section.title}
+ )}
    {section.items.map((item) => { const isActive = currentSlug === item.slug; + const isExternal = item.href?.startsWith("http"); return (
  • - - {item.title} - + {isExternal ? ( + + {item.title} + + ) : ( + + {item.title} + + )}
  • ); })} diff --git a/docs-site/src/utils/docs.ts b/docs-site/src/utils/docs.ts index 25602417..ae442555 100644 --- a/docs-site/src/utils/docs.ts +++ b/docs-site/src/utils/docs.ts @@ -100,7 +100,8 @@ export function getDocSlugs(): string[] { // Navigation structure export interface NavSection { title: string; - items: { slug: string; title: string }[]; + href?: string; + items: { slug: string; title: string; href?: string }[]; } export function getNavigation(): NavSection[] { @@ -118,10 +119,12 @@ export function getNavigation(): NavSection[] { { slug: "chaining-operations", title: "Chaining Operations" }, { slug: "installation", title: "Installation" }, { slug: "skills-vs-mcp", title: "Skills vs MCP" }, + { slug: "api-key", title: "API Key", href: "https://everyrow.io/api-key" }, ], }, { title: "API Reference", + href: "/api", items: reference.map((d) => ({ slug: d.slug, title: d.title.replace(/^reference\//, ""), @@ -129,12 +132,14 @@ export function getNavigation(): NavSection[] { }, { title: "Guides", + href: "/guides", items: guides - .filter((d) => !["getting-started", "chaining-operations", "installation", "skills-vs-mcp"].includes(d.slug)) + .filter((d) => !["getting-started", "chaining-operations", "installation", "skills-vs-mcp", "guides", "notebooks", "api"].includes(d.slug)) .map((d) => ({ slug: d.slug, title: d.title })), }, { title: "Case Studies", + href: "/notebooks", items: notebooks.map((n) => ({ slug: `notebooks/${n.slug}`, title: n.title, diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 00000000..3181c6ae --- /dev/null +++ b/docs/api.md @@ -0,0 +1,68 @@ +--- +title: API Reference +description: Complete API reference for everyrow — screen, rank, dedupe, merge, and research operations powered by LLM web research agents. +--- + +# API Reference + +Five operations for processing data with LLM-powered web research agents. Each takes a DataFrame and a natural-language instruction. + +## screen + +```python +result = await screen(task=..., input=df, response_model=Model) +``` + +`screen` takes a DataFrame and a natural-language filter predicate, evaluates each row using web research agents, and returns only the rows that pass. The filter condition does not need to be computable from existing columns. Agents can research external information to make the determination. + +[Full reference →](/reference/SCREEN) +Guides: [Filter a DataFrame with LLMs](/filter-dataframe-with-llm) +Notebooks: [LLM Screening at Scale](/notebooks/llm-powered-screening-at-scale), [Screen Stocks by Investment Thesis](/notebooks/screen-stocks-by-investment-thesis) + +## rank + +```python +result = await rank(task=..., input=df, field_name="score") +``` + +`rank` takes a DataFrame and a natural-language scoring criterion, dispatches web research agents to compute a score for each row, and returns the DataFrame sorted by that score. The sort key does not need to exist in your data. Agents derive it at runtime by searching the web, reading pages, and reasoning over what they find. + +[Full reference →](/reference/RANK) +Guides: [Sort a Dataset Using Web Data](/rank-by-external-metric) +Notebooks: [Score Leads from Fragmented Data](/notebooks/score-leads-from-fragmented-data), [Score Leads Without CRM History](/notebooks/score-leads-without-crm-history) + +## dedupe + +```python +result = await dedupe(input=df, equivalence_relation="...") +``` + +`dedupe` groups duplicate rows in a DataFrame based on a natural-language equivalence relation, assigns cluster IDs, and selects a canonical row per cluster. The duplicate criterion is semantic and LLM-powered: agents reason over the data and, when needed, search the web for external information to establish equivalence. This handles abbreviations, name variations, job changes, and entity relationships that no string similarity threshold can capture. + +[Full reference →](/reference/DEDUPE) +Guides: [Remove Duplicates from ML Training Data](/deduplicate-training-data-ml), [Resolve Duplicate Entities](/resolve-entities-python) +Notebooks: [Dedupe CRM Company Records](/notebooks/dedupe-crm-company-records) + +## merge + +```python +result = await merge(task=..., left_table=df1, right_table=df2) +``` + +`merge` left-joins two DataFrames using LLM-powered agents to resolve the key mapping instead of requiring exact or fuzzy key matches. Agents resolve semantic relationships by reasoning over the data and, when needed, searching the web for external information to establish matches: subsidiaries, regional names, abbreviations, and product-to-parent-company mappings. + +[Full reference →](/reference/MERGE) +Guides: [Fuzzy Join Without Matching Keys](/fuzzy-join-without-keys) +Notebooks: [LLM Merging at Scale](/notebooks/llm-powered-merging-at-scale), [Match Software Vendors to Requirements](/notebooks/match-software-vendors-to-requirements) + +## agent_map / single_agent + +```python +result = await agent_map(task=..., input=df) +``` + +`single_agent` runs one web research agent on a single input (or no input). `agent_map` runs an agent on every row of a DataFrame in parallel. Both dispatch agents that search the web, read pages, and return structured results. The transform is live web research: agents fetch and synthesize external information to populate new columns. + +[Full reference →](/reference/RESEARCH) +Guides: [Add a Column with Web Lookup](/add-column-web-lookup), [Classify and Label Data with an LLM](/classify-dataframe-rows-llm) +Notebooks: [LLM Web Research Agents at Scale](/notebooks/llm-web-research-agents-at-scale), [Agent Map Regulatory Status](/notebooks/agent-map-regulatory-status) diff --git a/docs/guides.md b/docs/guides.md new file mode 100644 index 00000000..3dd93483 --- /dev/null +++ b/docs/guides.md @@ -0,0 +1,30 @@ +--- +title: Guides +description: Step-by-step tutorials for using everyrow to screen, rank, dedupe, merge, and research data with LLM-powered agents. +--- + +# Guides + +Practical walkthroughs that show you how to use everyrow for common data processing tasks. Each guide covers a single operation end-to-end with working code. + +## Screen + +- [Filter a DataFrame with LLMs](/filter-dataframe-with-llm) + +## Rank + +- [Sort a Dataset Using Web Data](/rank-by-external-metric) + +## Dedupe + +- [Remove Duplicates from ML Training Data](/deduplicate-training-data-ml) +- [Resolve Duplicate Entities](/resolve-entities-python) + +## Merge + +- [Fuzzy Join Without Matching Keys](/fuzzy-join-without-keys) + +## Research + +- [Add a Column with Web Lookup](/add-column-web-lookup) +- [Classify and Label Data with an LLM](/classify-dataframe-rows-llm) diff --git a/docs/notebooks.md b/docs/notebooks.md new file mode 100644 index 00000000..f471d387 --- /dev/null +++ b/docs/notebooks.md @@ -0,0 +1,41 @@ +--- +title: Notebooks +description: Runnable notebooks demonstrating everyrow operations on real datasets — screen, rank, dedupe, merge, and research with LLM-powered agents. +--- + +# Notebooks + +Runnable notebooks with real datasets. Each notebook demonstrates an everyrow operation end-to-end with output you can inspect. + +## Screen + +- [LLM-Powered Screening at Scale](/notebooks/llm-powered-screening-at-scale) +- [Screen Stocks by Investment Thesis](/notebooks/screen-stocks-by-investment-thesis) +- [Screen Stocks by Margin Sensitivity](/notebooks/screen-stocks-by-margin-sensitivity) +- [Screen Job Postings by Criteria](/notebooks/screen-job-postings-by-criteria) + +## Rank + +- [Score Leads from Fragmented Data](/notebooks/score-leads-from-fragmented-data) +- [Score Leads Without CRM History](/notebooks/score-leads-without-crm-history) +- [Research and Rank Permit Times](/notebooks/research-and-rank-permit-times) + +## Dedupe + +- [Dedupe CRM Company Records](/notebooks/dedupe-crm-company-records) + +## Merge + +- [LLM-Powered Merging at Scale](/notebooks/llm-powered-merging-at-scale) +- [Match Software Vendors to Requirements](/notebooks/match-software-vendors-to-requirements) +- [Merge Contacts with Company Data](/notebooks/merge-contacts-with-company-data) +- [Merge Overlapping Contact Lists](/notebooks/merge-overlapping-contact-lists) + +## Research + +- [LLM Web Research Agents at Scale](/notebooks/llm-web-research-agents-at-scale) +- [Agent Map Regulatory Status](/notebooks/agent-map-regulatory-status) + +## Multi-Method + +- [Multi-Stage Lead Qualification](/notebooks/multi-stage-lead-qualification) diff --git a/docs/reference/DEDUPE.md b/docs/reference/DEDUPE.md index 7994130d..0b201459 100644 --- a/docs/reference/DEDUPE.md +++ b/docs/reference/DEDUPE.md @@ -136,7 +136,15 @@ Output (selected rows only): | 500 | ~2 min | ~$1.67 | | 2,000 | ~8 min | ~$7 | -## Case studies +## Related docs -- [CRM Deduplication](https://futuresearch.ai/crm-deduplication/) — 500 rows down to 124 (75% were duplicates) -- [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/) — 98% accuracy handling career changes and typos +### Guides +- [Remove Duplicates from ML Training Data](/deduplicate-training-data-ml) +- [Resolve Duplicate Entities](/resolve-entities-python) + +### Notebooks +- [Dedupe CRM Company Records](/notebooks/dedupe-crm-company-records) + +### Blog posts +- [CRM Deduplication](https://futuresearch.ai/crm-deduplication/) +- [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/) diff --git a/docs/reference/MERGE.md b/docs/reference/MERGE.md index a82eec1a..8e806c6a 100644 --- a/docs/reference/MERGE.md +++ b/docs/reference/MERGE.md @@ -65,8 +65,18 @@ A DataFrame with all left table columns plus matched right table columns. Rows t | 2,000 × 50 | ~8 min | ~$9 | | 1,000 × 1,000 | ~12 min | ~$15 | -## Case studies +## Related docs -- [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/) — 2,000 products to 50 vendors, 91% accuracy, zero false positives -- [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/) — 99.9% recall despite GitHub handles, typos, and partial emails -- [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/) — joining fund-level and contact-level data +### Guides +- [Fuzzy Join Without Matching Keys](/fuzzy-join-without-keys) + +### Notebooks +- [LLM Merging at Scale](/notebooks/llm-powered-merging-at-scale) +- [Match Software Vendors to Requirements](/notebooks/match-software-vendors-to-requirements) +- [Merge Contacts with Company Data](/notebooks/merge-contacts-with-company-data) +- [Merge Overlapping Contact Lists](/notebooks/merge-overlapping-contact-lists) + +### Blog posts +- [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/) +- [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/) +- [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/) diff --git a/docs/reference/RANK.md b/docs/reference/RANK.md index a03b6f35..c5b4fccb 100644 --- a/docs/reference/RANK.md +++ b/docs/reference/RANK.md @@ -81,7 +81,16 @@ When specifying a response model, make sure that it contains `field_name`. Other | `ascending_order` | bool | True = lowest first (default) | | `preview` | bool | True = process only a few rows | -## Case studies +## Related docs -- [Ranking 1000 Businesses by Data Fragmentation Risk](https://futuresearch.ai/lead-scoring-data-fragmentation/): Ranking 1,000 B2B leads by data fragmentation risk -- [Rank Leads Like an Analyst, Not a Marketer](https://futuresearch.ai/lead-scoring-without-crm/): Using `rank` to score leads instead of a CRM +### Guides +- [Sort a Dataset Using Web Data](/rank-by-external-metric) + +### Notebooks +- [Score Leads from Fragmented Data](/notebooks/score-leads-from-fragmented-data) +- [Score Leads Without CRM History](/notebooks/score-leads-without-crm-history) +- [Research and Rank Permit Times](/notebooks/research-and-rank-permit-times) + +### Blog posts +- [Ranking by Data Fragmentation Risk](https://futuresearch.ai/lead-scoring-data-fragmentation/) +- [Rank Leads Like an Analyst](https://futuresearch.ai/lead-scoring-without-crm/) diff --git a/docs/reference/RESEARCH.md b/docs/reference/RESEARCH.md index 2ac90be1..c32c9fcb 100644 --- a/docs/reference/RESEARCH.md +++ b/docs/reference/RESEARCH.md @@ -130,3 +130,13 @@ companies = await single_agent( return_table=True, # Return a table of companies ) ``` + +## Related docs + +### Guides +- [Add a Column with Web Lookup](/add-column-web-lookup) +- [Classify and Label Data with an LLM](/classify-dataframe-rows-llm) + +### Notebooks +- [Agent Map Regulatory Status](/notebooks/agent-map-regulatory-status) +- [LLM Web Research Agents at Scale](/notebooks/llm-web-research-agents-at-scale) diff --git a/docs/reference/SCREEN.md b/docs/reference/SCREEN.md index a17b5e1a..e1184b72 100644 --- a/docs/reference/SCREEN.md +++ b/docs/reference/SCREEN.md @@ -90,8 +90,19 @@ class Detailed(BaseModel): Compare: regex on "remote-friendly" job postings gets 68% precision. -## Case studies - -- [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/) — 63 of 502 S&P 500 companies passed, $3.29 -- [Job Posting Screen](https://futuresearch.ai/job-posting-screening/) — >90% precision vs 68% for regex -- [Screening Workflow](https://futuresearch.ai/screening-workflow/) — iterate on criteria without rerunning everything +## Related docs + +### Guides +- [Filter a DataFrame with LLMs](/filter-dataframe-with-llm) + +### Notebooks +- [LLM Screening at Scale](/notebooks/llm-powered-screening-at-scale) +- [Screen Job Postings by Criteria](/notebooks/screen-job-postings-by-criteria) +- [Screen Stocks by Investment Thesis](/notebooks/screen-stocks-by-investment-thesis) +- [Screen Stocks by Margin Sensitivity](/notebooks/screen-stocks-by-margin-sensitivity) +- [Multi-Stage Lead Qualification](/notebooks/multi-stage-lead-qualification) + +### Blog posts +- [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/) +- [Job Posting Screen](https://futuresearch.ai/job-posting-screening/) +- [Screening Workflow](https://futuresearch.ai/screening-workflow/)