diff --git a/docs-site/scripts/check-links.py b/docs-site/scripts/check-links.py index 7b235249..dcd2f02d 100644 --- a/docs-site/scripts/check-links.py +++ b/docs-site/scripts/check-links.py @@ -46,17 +46,12 @@ # Each URL must be listed explicitly — new links to the same domain will # error until added here, so broken links don't slip through unnoticed. SKIPPED_URLS: set[str] = { - "https://ai.google.dev/gemini-api/docs/gemini-3", - "https://ai.google.dev/gemini-api/docs/structured-output", - "https://arxiv.org/abs/2506.21558", "https://clinicaltrials.gov/", "https://clinicaltrials.gov/data-api/about-api", "https://code.claude.com/docs/en/discover-plugins", "https://code.claude.com/docs/en/mcp", "https://cursor.com/deeplink/mcp-install-dark.svg", "https://cursor.com/docs/context/skills", - "https://developers.openai.com/api/docs/guides/structured-outputs/", - "https://developers.openai.com/api/reference/resources/responses/methods/create", "https://developers.openai.com/codex/mcp/", "https://developers.openai.com/codex/skills", "https://docs.astral.sh/uv/", @@ -73,9 +68,9 @@ "https://hugovk.github.io/top-pypi-packages/", "https://jqlang.org/", "https://pip.pypa.io/en/stable/", - "https://platform.claude.com/docs/en/build-with-claude/extended-thinking", "https://www.kaggle.com/code/rafaelpoyiadzi/active-learning-with-an-llm-oracle", "https://www.kaggle.com/datasets/tunguz/pubmed-title-abstracts-2019-baseline", + "https://arxiv.org/abs/2506.21558", } diff --git a/docs-site/src/app/blog/[slug]/page.tsx b/docs-site/src/app/blog/[slug]/page.tsx deleted file mode 100644 index 3e101d38..00000000 --- a/docs-site/src/app/blog/[slug]/page.tsx +++ /dev/null @@ -1,62 +0,0 @@ -import { notFound } from "next/navigation"; -import { DocsLayout } from "@/components/DocsLayout"; -import { MDXContent } from "@/components/MDXContent"; -import { getNavigation } from "@/utils/docs"; -import { getBlogPostBySlug, getBlogPostSlugs, formatDate } from "@/utils/blog"; - -interface PageProps { - params: Promise<{ slug: string }>; -} - -export async function generateStaticParams() { - const slugs = getBlogPostSlugs(); - return slugs.map((slug) => ({ slug })); -} - -export async function generateMetadata({ params }: PageProps) { - const { slug } = await params; - const post = getBlogPostBySlug(slug); - - if (!post) { - return { title: "Not Found" }; - } - - const canonicalUrl = `https://everyrow.io/docs/blog/${slug}`; - - return { - title: post.title, - description: post.description, - alternates: { canonical: canonicalUrl }, - openGraph: { - title: post.title, - description: post.description, - url: canonicalUrl, - images: [{ url: "https://everyrow.io/everyrow-og.png" }], - }, - }; -} - -export default async function BlogPostPage({ params }: PageProps) { - const { slug } = await params; - const post = getBlogPostBySlug(slug); - - if (!post) { - notFound(); - } - - const navigation = getNavigation(); - - return ( - -
-
- {post.date && {formatDate(post.date)}} - {post.authors.length > 0 && ( - {post.authors.join(", ")} - )} -
- -
-
- ); -} diff --git a/docs-site/src/app/blog/page.tsx b/docs-site/src/app/blog/page.tsx deleted file mode 100644 index c396c7ef..00000000 --- a/docs-site/src/app/blog/page.tsx +++ /dev/null @@ -1,53 +0,0 @@ -import type { Metadata } from "next"; -import Link from "next/link"; -import { DocsLayout } from "@/components/DocsLayout"; -import { getNavigation } from "@/utils/docs"; -import { getAllBlogPosts, formatDate } from "@/utils/blog"; - -export const metadata: Metadata = { - title: "Blog - Everyrow", - description: - "Technical blog posts from the everyrow team about LLMs, data processing, and building AI-powered infrastructure.", - alternates: { - canonical: "https://everyrow.io/docs/blog", - }, - openGraph: { - title: "Blog - Everyrow", - description: - "Technical blog posts from the everyrow team about LLMs, data processing, and building AI-powered infrastructure.", - url: "https://everyrow.io/docs/blog", - images: [{ url: "https://everyrow.io/everyrow-og.png" }], - }, -}; - -export default async function BlogPage() { - const navigation = getNavigation(); - const posts = getAllBlogPosts(); - - return ( - -

Blog

-

- Technical posts from the everyrow team. -

-
- {posts.map((post) => ( - -
- {post.date && {formatDate(post.date)}} - {post.authors.length > 0 && {post.authors.join(", ")}} -
-

{post.title}

- {post.description && ( -

{post.description}

- )} - - ))} -
-
- ); -} diff --git a/docs-site/src/app/globals.css b/docs-site/src/app/globals.css index f34b275c..e639da2a 100644 --- a/docs-site/src/app/globals.css +++ b/docs-site/src/app/globals.css @@ -71,11 +71,6 @@ a:hover { opacity: 0.8; } -.docs-sidebar-logo-chips { - display: flex; - gap: 0.375rem; -} - .docs-sidebar-logo-chip { font-family: var(--font-inter), -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; font-size: 0.6875rem; @@ -702,76 +697,6 @@ noscript + .installation-tabs .tab-content:first-child { display: none; } -/* Blog styles */ -.blog-post-meta { - display: flex; - align-items: center; - gap: 0.75rem; - font-size: 0.875rem; - color: var(--muted); - margin-bottom: 0.25rem; -} - -.blog-post-author::before { - content: "\00b7"; - margin-right: 0.75rem; -} - -.blog-listing-subtitle { - font-size: 1rem; - color: var(--muted); - margin-bottom: 2rem; -} - -.blog-listing { - display: flex; - flex-direction: column; - gap: 1rem; -} - -.blog-listing-card { - display: block; - padding: 1.25rem; - border: 1px solid var(--border); - border-radius: 0.75rem; - text-decoration: none; - color: inherit; - transition: border-color 0.15s; -} - -.blog-listing-card:hover { - border-color: var(--accent); - text-decoration: none; -} - -.blog-listing-card-meta { - display: flex; - align-items: center; - gap: 0.75rem; - font-size: 0.8rem; - color: var(--muted); - margin-bottom: 0.5rem; -} - -.blog-listing-card-meta span::before { - content: "\00b7"; - margin-right: 0.75rem; -} - -.blog-listing-card-title { - font-size: 1.125rem; - font-weight: 600; - margin: 0 0 0.5rem; - color: var(--foreground); -} - -.blog-listing-card-description { - font-size: 0.875rem; - color: var(--muted); - line-height: 1.5; - margin: 0; -} - /* Mobile responsive */ @media (max-width: 768px) { /* Hide desktop elements on mobile */ diff --git a/docs-site/src/app/page.tsx b/docs-site/src/app/page.tsx index c7b0e7a1..7bbb097a 100644 --- a/docs-site/src/app/page.tsx +++ b/docs-site/src/app/page.tsx @@ -82,7 +82,8 @@ function SectionCard({ section }: { section: NavSection }) { - )} + )} +

{section.title}

{description}

diff --git a/docs-site/src/app/sitemap.ts b/docs-site/src/app/sitemap.ts index 6c0bda81..566f6052 100644 --- a/docs-site/src/app/sitemap.ts +++ b/docs-site/src/app/sitemap.ts @@ -1,7 +1,6 @@ import { MetadataRoute } from "next"; import { getDocSlugs } from "@/utils/docs"; import { getNotebookSlugs } from "@/utils/notebooks"; -import { getBlogPostSlugs } from "@/utils/blog"; export const dynamic = "force-static"; @@ -10,9 +9,8 @@ export default function sitemap(): MetadataRoute.Sitemap { const docSlugs = getDocSlugs(); const notebookSlugs = getNotebookSlugs(); - const blogSlugs = getBlogPostSlugs(); - const hubSlugs = new Set(["guides", "case-studies", "api", "blog"]); + const hubSlugs = new Set(["guides", "case-studies", "api"]); const docPages = docSlugs.map((slug) => ({ url: `${baseUrl}/${slug}`, @@ -28,13 +26,6 @@ export default function sitemap(): MetadataRoute.Sitemap { priority: 0.7, })); - const blogPages = blogSlugs.map((slug) => ({ - url: `${baseUrl}/blog/${slug}`, - lastModified: new Date(), - changeFrequency: "monthly" as const, - priority: 0.7, - })); - return [ { url: baseUrl, @@ -44,6 +35,5 @@ export default function sitemap(): MetadataRoute.Sitemap { }, ...docPages, ...notebookPages, - ...blogPages, ]; } diff --git a/docs-site/src/blog/llm-provider-quirks.mdx b/docs-site/src/blog/llm-provider-quirks.mdx deleted file mode 100644 index 27750451..00000000 --- a/docs-site/src/blog/llm-provider-quirks.mdx +++ /dev/null @@ -1,181 +0,0 @@ ---- -title: "The Hidden Incompatibilities Between LLM Providers" -subtitle: "What we learned building a system that interchangably supports Anthropic, Google and OpenAI" -date: 2026-02-24 -description: "LLM APIs look interchangeable on paper. In practice, they diverge in subtle ways that break your code. We document the provider-specific quirks we've hit while running thousands of LLM calls per day across Anthropic, Google, and OpenAI." -authors: - - "Robert Gambee" -tags: - - "LLMs" - - "AI Infrastructure" - - "Anthropic" - - "Google Gemini" - - "OpenAI" - - "JSON Schema" - - "Tool Calling" - - "AI Engineering" ---- - -# The Hidden Incompatibilities Between LLM Providers - -If you've ever tried to swap one LLM provider for another, you've probably noticed that things aren't as interchangeable as the docs suggest. On paper, the APIs are converging: they all accept messages, they all support tool calling, they all accept JSON Schemas for structured output. In practice, each provider has opinions about what "valid" means, and those opinions don't always agree with each other or with the relevant specs. - -At FutureSearch, our [everyrow.io](https://everyrow.io/) app is powered by tens of thousands of LLM calls per day across Anthropic, Google, and OpenAI. Our system needs to be able to route any task to any provider depending on performance, cost, and availability. There are middleware services which aim to abstract away the provider-specific differences. But they aren't perfect, so we often need to address these quirks ourselves. - -Over the past year, we've accumulated a collection of provider-specific patches and workarounds. None of them are individually complicated, but together they paint a picture of an ecosystem that's less standardized than it appears. - -This post documents the quirks we've found. Some of these might save you a debugging session. - -## JSON Schema: three providers, three interpretations - -Structured output is one of the most useful features of modern LLM APIs. You pass a JSON Schema, and the model returns output that conforms to it. Except "conforms to it" means different things to different providers. - -### Gemini requires array items to have a type - -Consider this JSON Schema: - -```json -{ - "type": "array", - "items": {} -} -``` - -In JSON Schema, `items: {}` means "the array can contain elements of any type." If you write a Pydantic model with a bare `list` annotation, it will produce a schema like this. However, Gemini requires `items` to have an explicit `type` field, so this gets rejected at the API level. Unfortunately, [the structured output documentation](https://ai.google.dev/gemini-api/docs/structured-output) doesn't mention this requirement. - -Our fix replaces empty items with `{"type": "string"}` for Gemini requests. It's not semantically identical, but string is the most universal type and it works in every case we've encountered: - -```python -def fix_empty_items(schema: dict) -> dict: - if schema.get("items") == {}: - schema["items"] = {"type": "string"} - for key, value in schema.items(): - if isinstance(value, dict): - schema[key] = fix_empty_items(value) - elif isinstance(value, list): - schema[key] = [ - fix_empty_items(item) - if isinstance(item, dict) else item - for item in value - ] - return schema -``` - -The fix needs to be applied recursively to the entire schema. And it needs to be applied in two cases: when generating structured output and when specifying tool signatures. Easy to miss the second one. - -Interestingly, I can reproduce the problem when using their API directly but not with their `genai` Python package. Perhaps they're aware of this issue, but they decided to address it in the Python package rather than in their backend. Or more likely: the team that maintains the Python package got fed up waiting for the backend team to do something about this. - -### OpenAI can't agree with itself on structured output - -When generating structured output with OpenAI's `responses` API, you need to pass the schema in a specific format: - -```json -{ - "format": { - "type": "json_schema", - "name": "Response", - "description": "A response to the user's question", - "schema": { - "type": "object", - "properties": { - "answer": { - "type": "string" - } - }, - "required": ["answer"] - } - } -} -``` - -But the `completions` API requires this instead: - -```json -{ - "response_format": { - "type": "json_schema", - "json_schema": { - "name": "Response", - "description": "A response to the user's question", - "schema": { - "type": "object", - "properties": { - "answer": { - "type": "string" - } - }, - "required": ["answer"] - } - } - } -} -``` - -Some key names are slightly different: `format`/`response_format`, `schema`/`json_schema`. And the `name` and `description` are nested at different levels. These are easy to overlook, and [the documentation](https://developers.openai.com/api/docs/guides/structured-outputs/) doesn't make this clear. The only way to figure it out is to read the source code. It's obvious these two APIs were designed by different teams. - -### No provider supports top-level $ref - -JSON Schema has the concept of definitions and references, which allows you to define a schema in one place and reuse it in another. These are helpful if you want to reuse a subschema without repeating yourself. And Pydantic automatically generates them for certain model hierarchies. Here's an example: - -```json -{ - "$ref": "#/$defs/Response", - "$defs": { - "Response": { - "type": "object", - "properties": { - "answer": { - "type": "string" - } - }, - "required": ["answer"] - } - } -} -``` - -That's a perfectly valid JSON Schema. But none of the major LLM providers accept this. They don't allow the root to be a `$ref`. - -The solution is straightforward enough: find the `$ref`, look it up in `$defs`, and use the resolved definition as the top-level schema. But it's the kind of thing that works fine in tests with simple response formats, only to break in production when your schema happens to be structured differently. - -## Tool calling: Anthropic can't force tool calls when thinking - -When you set `tool_choice: {"type": "any"}` on a Claude model with extended thinking enabled, Anthropic returns an error. You're not allowed to force tool use while the model is in thinking mode. - -This is a real problem if you have a system that relies on structured tool call outputs, because reasoning models are often the ones you want for complex tasks. Our workaround has two steps: first, call the thinking model with `tool_choice: {"type": "auto"}`. If the model happens to return tool calls, great. If it doesn't, make a second call with extended thinking disabled and force tool use with `tool_choice: {"type": "any"}`. - -It's not ideal since it sometimes means an extra API call, but it works. OpenAI and Gemini don't have this restriction. To be fair to Anthropic, at least they [document this clearly](https://platform.claude.com/docs/en/build-with-claude/extended-thinking#extended-thinking-with-tool-use). - -## Prompt caching: only Anthropic makes you do the work - -Anthropic is the only major provider where prompt caching requires explicit markers in your request. You add `cache_control: {"type": "ephemeral"}` to specific messages that mark cache breakpoints, and Anthropic caches everything up to that point. On cache hits, you pay 90% less for the cached tokens (but 25% more on the initial write). - -OpenAI and Google handle caching automatically on their end. You don't need to do anything. - -The upside of Anthropic's approach is control: you can be strategic about where your cache boundaries fall. The downside is that it's one more thing to get right. And at scale, the cost of getting it wrong can be huge! At one point, we were wasting thousands of dollars per month because of incorrect cache markers. - -In our implementation, we identify the last message in the first contiguous block of cacheable messages and add the cache marker there. It's only a few dozen lines of code. But it's logic that the other providers have internalized on your behalf. - -## Temperature constraints for reasoning models - -Back in the early days of LLMs, temperature was an important parameter for controlling the "creativity" of the model's output. It was fun to turn it up and watch the model's response unravel into utter nonsense. - -But with the rise of reasoning models, temperature has become deprecated. The big three providers restrict it to various degrees. - -- Google still supports temperature for Gemini 3 models, but [it strongly discourages](https://ai.google.dev/gemini-api/docs/gemini-3#temperature) setting it to anything other than 1. -- Anthropic [requires](https://platform.claude.com/docs/en/build-with-claude/extended-thinking#feature-compatibility) temperature to be set to 1 when extended thinking is enabled. -- OpenAI's o3 and GPT-5 families don't support temperature at all. But [their responses API docs](https://developers.openai.com/api/reference/resources/responses/methods/create) still lists temperature as a parameter with no mention of its lack of support. - -## Living with the mess - -None of these issues are showstoppers. They're all fixable with a few lines of code each. But they add up. Our LLM client module has accumulated hundreds of lines of provider-specific logic, and new patches keep coming. - -There are solutions out there which claim to abstract away all these silly details and provide a clean, unified interface. But in our experience, these glue layers are highly imperfect and introduce their own set of bugs and edge cases, requiring even more patches and workarounds. - -But this is what it's like to work at the bleeding edge. If you're building multi-provider LLM infrastructure and have encountered other quirks not covered here, we'd love to hear about them. Our team at FutureSearch works on this stuff daily to make [everyrow.io](https://everyrow.io/) the best it can possibly be. - -## Related - -- [Anthropic Is the Only Frontier Lab Where Reasoning Effort Scales on Deep Research Bench](https://futuresearch.ai/effort-scaling) -- [Higher effort settings in LLMs can reduce accuracy](https://futuresearch.ai/effort-paradox) -- [Deep Research Benchmark: Evaluating LLM Web Research Agents](https://futuresearch.ai/deep-research-bench) diff --git a/docs-site/src/components/Sidebar.tsx b/docs-site/src/components/Sidebar.tsx index 479b2a85..facd8243 100644 --- a/docs-site/src/components/Sidebar.tsx +++ b/docs-site/src/components/Sidebar.tsx @@ -21,10 +21,7 @@ export function Sidebar({ navigation, isOpen, onClose }: SidebarProps) {