Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs-site/src/app/globals.css
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ a:hover {
letter-spacing: 0.05em;
color: var(--muted);
margin-bottom: 0.5rem;
text-decoration: none;
display: block;
}

a.docs-sidebar-section-title:hover {
color: var(--foreground);
}

.docs-sidebar-nav {
Expand Down
10 changes: 9 additions & 1 deletion docs-site/src/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ const SECTION_DESCRIPTIONS: Record<string, string> = {
"Case Studies": "Real-world examples with Jupyter notebooks",
};

const SECTION_LINKS: Record<string, string> = {
"API Reference": "/api",
Guides: "/guides",
"Case Studies": "/notebooks",
};

const SECTION_DISPLAY_TITLES: Record<string, string> = {
Overview: "Getting Started",
};
Expand All @@ -44,8 +50,10 @@ function SectionCard({ section }: { section: NavSection }) {

if (!firstItem) return null;

const href = SECTION_LINKS[section.title] || `/${firstItem.slug}`;

return (
<Link href={`/${firstItem.slug}`} className="landing-card">
<Link href={href} className="landing-card">
<div className="landing-card-icon" data-icon={icon}>
{icon === "rocket" && (
<svg
Expand Down
4 changes: 3 additions & 1 deletion docs-site/src/app/sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ export default function sitemap(): MetadataRoute.Sitemap {
const docSlugs = getDocSlugs();
const notebookSlugs = getNotebookSlugs();

const hubSlugs = new Set(["guides", "notebooks", "api"]);

const docPages = docSlugs.map((slug) => ({
url: `${baseUrl}/${slug}`,
lastModified: new Date(),
changeFrequency: "weekly" as const,
priority: 0.8,
priority: hubSlugs.has(slug) ? 0.9 : 0.8,
}));

const notebookPages = notebookSlugs.map((slug) => ({
Expand Down
29 changes: 21 additions & 8 deletions docs-site/src/components/Sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,32 @@ export function Sidebar({ navigation, isOpen, onClose }: SidebarProps) {

{navigation.map((section) => (
<div key={section.title} className="docs-sidebar-section">
<div className="docs-sidebar-section-title">{section.title}</div>
{section.href ? (
<Link href={section.href} className="docs-sidebar-section-title" onClick={onClose}>
{section.title}
</Link>
) : (
<div className="docs-sidebar-section-title">{section.title}</div>
)}
<ul className="docs-sidebar-nav">
{section.items.map((item) => {
const isActive = currentSlug === item.slug;
const isExternal = item.href?.startsWith("http");
return (
<li key={item.slug}>
<Link
href={`/${item.slug}`}
className={isActive ? "active" : ""}
onClick={onClose}
>
{item.title}
</Link>
{isExternal ? (
<a href={item.href} target="_blank" rel="noopener noreferrer">
{item.title}
</a>
) : (
<Link
href={item.href || `/${item.slug}`}
className={isActive ? "active" : ""}
onClick={onClose}
>
{item.title}
</Link>
)}
</li>
);
})}
Expand Down
9 changes: 7 additions & 2 deletions docs-site/src/utils/docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ export function getDocSlugs(): string[] {
// Navigation structure
export interface NavSection {
title: string;
items: { slug: string; title: string }[];
href?: string;
items: { slug: string; title: string; href?: string }[];
}

export function getNavigation(): NavSection[] {
Expand All @@ -118,23 +119,27 @@ export function getNavigation(): NavSection[] {
{ slug: "chaining-operations", title: "Chaining Operations" },
{ slug: "installation", title: "Installation" },
{ slug: "skills-vs-mcp", title: "Skills vs MCP" },
{ slug: "api-key", title: "API Key", href: "https://everyrow.io/api-key" },
],
},
{
title: "API Reference",
href: "/api",
items: reference.map((d) => ({
slug: d.slug,
title: d.title.replace(/^reference\//, ""),
})),
},
{
title: "Guides",
href: "/guides",
items: guides
.filter((d) => !["getting-started", "chaining-operations", "installation", "skills-vs-mcp"].includes(d.slug))
.filter((d) => !["getting-started", "chaining-operations", "installation", "skills-vs-mcp", "guides", "notebooks", "api"].includes(d.slug))
.map((d) => ({ slug: d.slug, title: d.title })),
},
{
title: "Case Studies",
href: "/notebooks",
items: notebooks.map((n) => ({
slug: `notebooks/${n.slug}`,
title: n.title,
Expand Down
68 changes: 68 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
title: API Reference
description: Complete API reference for everyrow — screen, rank, dedupe, merge, and research operations powered by LLM web research agents.
---

# API Reference

Five operations for processing data with LLM-powered web research agents. Each takes a DataFrame and a natural-language instruction.

## screen

```python
result = await screen(task=..., input=df, response_model=Model)
```

`screen` takes a DataFrame and a natural-language filter predicate, evaluates each row using web research agents, and returns only the rows that pass. The filter condition does not need to be computable from existing columns. Agents can research external information to make the determination.

[Full reference →](/reference/SCREEN)
Guides: [Filter a DataFrame with LLMs](/filter-dataframe-with-llm)
Notebooks: [LLM Screening at Scale](/notebooks/llm-powered-screening-at-scale), [Screen Stocks by Investment Thesis](/notebooks/screen-stocks-by-investment-thesis)

## rank

```python
result = await rank(task=..., input=df, field_name="score")
```

`rank` takes a DataFrame and a natural-language scoring criterion, dispatches web research agents to compute a score for each row, and returns the DataFrame sorted by that score. The sort key does not need to exist in your data. Agents derive it at runtime by searching the web, reading pages, and reasoning over what they find.

[Full reference →](/reference/RANK)
Guides: [Sort a Dataset Using Web Data](/rank-by-external-metric)
Notebooks: [Score Leads from Fragmented Data](/notebooks/score-leads-from-fragmented-data), [Score Leads Without CRM History](/notebooks/score-leads-without-crm-history)

## dedupe

```python
result = await dedupe(input=df, equivalence_relation="...")
```

`dedupe` groups duplicate rows in a DataFrame based on a natural-language equivalence relation, assigns cluster IDs, and selects a canonical row per cluster. The duplicate criterion is semantic and LLM-powered: agents reason over the data and, when needed, search the web for external information to establish equivalence. This handles abbreviations, name variations, job changes, and entity relationships that no string similarity threshold can capture.

[Full reference →](/reference/DEDUPE)
Guides: [Remove Duplicates from ML Training Data](/deduplicate-training-data-ml), [Resolve Duplicate Entities](/resolve-entities-python)
Notebooks: [Dedupe CRM Company Records](/notebooks/dedupe-crm-company-records)

## merge

```python
result = await merge(task=..., left_table=df1, right_table=df2)
```

`merge` left-joins two DataFrames using LLM-powered agents to resolve the key mapping instead of requiring exact or fuzzy key matches. Agents resolve semantic relationships by reasoning over the data and, when needed, searching the web for external information to establish matches: subsidiaries, regional names, abbreviations, and product-to-parent-company mappings.

[Full reference →](/reference/MERGE)
Guides: [Fuzzy Join Without Matching Keys](/fuzzy-join-without-keys)
Notebooks: [LLM Merging at Scale](/notebooks/llm-powered-merging-at-scale), [Match Software Vendors to Requirements](/notebooks/match-software-vendors-to-requirements)

## agent_map / single_agent

```python
result = await agent_map(task=..., input=df)
```

`single_agent` runs one web research agent on a single input (or no input). `agent_map` runs an agent on every row of a DataFrame in parallel. Both dispatch agents that search the web, read pages, and return structured results. The transform is live web research: agents fetch and synthesize external information to populate new columns.

[Full reference →](/reference/RESEARCH)
Guides: [Add a Column with Web Lookup](/add-column-web-lookup), [Classify and Label Data with an LLM](/classify-dataframe-rows-llm)
Notebooks: [LLM Web Research Agents at Scale](/notebooks/llm-web-research-agents-at-scale), [Agent Map Regulatory Status](/notebooks/agent-map-regulatory-status)
30 changes: 30 additions & 0 deletions docs/guides.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
title: Guides
description: Step-by-step tutorials for using everyrow to screen, rank, dedupe, merge, and research data with LLM-powered agents.
---

# Guides

Practical walkthroughs that show you how to use everyrow for common data processing tasks. Each guide covers a single operation end-to-end with working code.

## Screen

- [Filter a DataFrame with LLMs](/filter-dataframe-with-llm)

## Rank

- [Sort a Dataset Using Web Data](/rank-by-external-metric)

## Dedupe

- [Remove Duplicates from ML Training Data](/deduplicate-training-data-ml)
- [Resolve Duplicate Entities](/resolve-entities-python)

## Merge

- [Fuzzy Join Without Matching Keys](/fuzzy-join-without-keys)

## Research

- [Add a Column with Web Lookup](/add-column-web-lookup)
- [Classify and Label Data with an LLM](/classify-dataframe-rows-llm)
41 changes: 41 additions & 0 deletions docs/notebooks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
---
title: Notebooks
description: Runnable notebooks demonstrating everyrow operations on real datasets — screen, rank, dedupe, merge, and research with LLM-powered agents.
---

# Notebooks

Runnable notebooks with real datasets. Each notebook demonstrates an everyrow operation end-to-end with output you can inspect.

## Screen

- [LLM-Powered Screening at Scale](/notebooks/llm-powered-screening-at-scale)
- [Screen Stocks by Investment Thesis](/notebooks/screen-stocks-by-investment-thesis)
- [Screen Stocks by Margin Sensitivity](/notebooks/screen-stocks-by-margin-sensitivity)
- [Screen Job Postings by Criteria](/notebooks/screen-job-postings-by-criteria)

## Rank

- [Score Leads from Fragmented Data](/notebooks/score-leads-from-fragmented-data)
- [Score Leads Without CRM History](/notebooks/score-leads-without-crm-history)
- [Research and Rank Permit Times](/notebooks/research-and-rank-permit-times)

## Dedupe

- [Dedupe CRM Company Records](/notebooks/dedupe-crm-company-records)

## Merge

- [LLM-Powered Merging at Scale](/notebooks/llm-powered-merging-at-scale)
- [Match Software Vendors to Requirements](/notebooks/match-software-vendors-to-requirements)
- [Merge Contacts with Company Data](/notebooks/merge-contacts-with-company-data)
- [Merge Overlapping Contact Lists](/notebooks/merge-overlapping-contact-lists)

## Research

- [LLM Web Research Agents at Scale](/notebooks/llm-web-research-agents-at-scale)
- [Agent Map Regulatory Status](/notebooks/agent-map-regulatory-status)

## Multi-Method

- [Multi-Stage Lead Qualification](/notebooks/multi-stage-lead-qualification)
14 changes: 11 additions & 3 deletions docs/reference/DEDUPE.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,15 @@ Output (selected rows only):
| 500 | ~2 min | ~$1.67 |
| 2,000 | ~8 min | ~$7 |

## Case studies
## Related docs

- [CRM Deduplication](https://futuresearch.ai/crm-deduplication/) — 500 rows down to 124 (75% were duplicates)
- [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/) — 98% accuracy handling career changes and typos
### Guides
- [Remove Duplicates from ML Training Data](/deduplicate-training-data-ml)
- [Resolve Duplicate Entities](/resolve-entities-python)

### Notebooks
- [Dedupe CRM Company Records](/notebooks/dedupe-crm-company-records)

### Blog posts
- [CRM Deduplication](https://futuresearch.ai/crm-deduplication/)
- [Researcher Deduplication](https://futuresearch.ai/researcher-dedupe-case-study/)
18 changes: 14 additions & 4 deletions docs/reference/MERGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,18 @@ A DataFrame with all left table columns plus matched right table columns. Rows t
| 2,000 × 50 | ~8 min | ~$9 |
| 1,000 × 1,000 | ~12 min | ~$15 |

## Case studies
## Related docs

- [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/) — 2,000 products to 50 vendors, 91% accuracy, zero false positives
- [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/) — 99.9% recall despite GitHub handles, typos, and partial emails
- [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/) — joining fund-level and contact-level data
### Guides
- [Fuzzy Join Without Matching Keys](/fuzzy-join-without-keys)

### Notebooks
- [LLM Merging at Scale](/notebooks/llm-powered-merging-at-scale)
- [Match Software Vendors to Requirements](/notebooks/match-software-vendors-to-requirements)
- [Merge Contacts with Company Data](/notebooks/merge-contacts-with-company-data)
- [Merge Overlapping Contact Lists](/notebooks/merge-overlapping-contact-lists)

### Blog posts
- [Software Supplier Matching](https://futuresearch.ai/software-supplier-matching/)
- [HubSpot Contact Merge](https://futuresearch.ai/merge-hubspot-contacts/)
- [CRM Merge Workflow](https://futuresearch.ai/crm-merge-workflow/)
15 changes: 12 additions & 3 deletions docs/reference/RANK.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,16 @@ When specifying a response model, make sure that it contains `field_name`. Other
| `ascending_order` | bool | True = lowest first (default) |
| `preview` | bool | True = process only a few rows |

## Case studies
## Related docs

- [Ranking 1000 Businesses by Data Fragmentation Risk](https://futuresearch.ai/lead-scoring-data-fragmentation/): Ranking 1,000 B2B leads by data fragmentation risk
- [Rank Leads Like an Analyst, Not a Marketer](https://futuresearch.ai/lead-scoring-without-crm/): Using `rank` to score leads instead of a CRM
### Guides
- [Sort a Dataset Using Web Data](/rank-by-external-metric)

### Notebooks
- [Score Leads from Fragmented Data](/notebooks/score-leads-from-fragmented-data)
- [Score Leads Without CRM History](/notebooks/score-leads-without-crm-history)
- [Research and Rank Permit Times](/notebooks/research-and-rank-permit-times)

### Blog posts
- [Ranking by Data Fragmentation Risk](https://futuresearch.ai/lead-scoring-data-fragmentation/)
- [Rank Leads Like an Analyst](https://futuresearch.ai/lead-scoring-without-crm/)
10 changes: 10 additions & 0 deletions docs/reference/RESEARCH.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,13 @@ companies = await single_agent(
return_table=True, # Return a table of companies
)
```

## Related docs

### Guides
- [Add a Column with Web Lookup](/add-column-web-lookup)
- [Classify and Label Data with an LLM](/classify-dataframe-rows-llm)

### Notebooks
- [Agent Map Regulatory Status](/notebooks/agent-map-regulatory-status)
- [LLM Web Research Agents at Scale](/notebooks/llm-web-research-agents-at-scale)
21 changes: 16 additions & 5 deletions docs/reference/SCREEN.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,19 @@ class Detailed(BaseModel):

Compare: regex on "remote-friendly" job postings gets 68% precision.

## Case studies

- [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/) — 63 of 502 S&P 500 companies passed, $3.29
- [Job Posting Screen](https://futuresearch.ai/job-posting-screening/) — >90% precision vs 68% for regex
- [Screening Workflow](https://futuresearch.ai/screening-workflow/) — iterate on criteria without rerunning everything
## Related docs

### Guides
- [Filter a DataFrame with LLMs](/filter-dataframe-with-llm)

### Notebooks
- [LLM Screening at Scale](/notebooks/llm-powered-screening-at-scale)
- [Screen Job Postings by Criteria](/notebooks/screen-job-postings-by-criteria)
- [Screen Stocks by Investment Thesis](/notebooks/screen-stocks-by-investment-thesis)
- [Screen Stocks by Margin Sensitivity](/notebooks/screen-stocks-by-margin-sensitivity)
- [Multi-Stage Lead Qualification](/notebooks/multi-stage-lead-qualification)

### Blog posts
- [Thematic Stock Screen](https://futuresearch.ai/thematic-stock-screening/)
- [Job Posting Screen](https://futuresearch.ai/job-posting-screening/)
- [Screening Workflow](https://futuresearch.ai/screening-workflow/)