Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/search.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,12 @@ jobs:
run: |
pnpm run codegen:examples
pnpm run embeddings
pnpm run embeddings:nimbus

- name: Refresh embeddings
working-directory: ./apps/docs
if: ${{ inputs.refresh }}
run: pnpm run embeddings:refresh
run: |
pnpm run codegen:examples
pnpm run embeddings:refresh
pnpm run embeddings:nimbus:refresh
7 changes: 6 additions & 1 deletion apps/docs/app/api/ai/docs/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { ApplicationError, UserError, clippy } from 'ai-commands/edge'
import { NextRequest, NextResponse } from 'next/server'
import OpenAI from 'openai'

import { isFeatureEnabled } from 'common/enabled-features'

export const runtime = 'edge'
/* To avoid OpenAI errors, restrict to the Vercel Edge Function regions that
overlap with the OpenAI API regions.
Expand Down Expand Up @@ -54,7 +56,10 @@ export async function POST(req: NextRequest) {
throw new UserError('Missing messages in request data')
}

const response = await clippy(openai, supabaseClient, messages)
const useAltSearchIndex = !isFeatureEnabled('search:fullIndex')
const response = await clippy(openai, supabaseClient, messages, {
useAltSearchIndex,
})

// Proxy the streamed SSE response from OpenAI
return new NextResponse(response.body, {
Expand Down
7 changes: 4 additions & 3 deletions apps/docs/next.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,10 @@ const nextConfig = {
]
},
typescript: {
// WARNING: production builds can successfully complete even there are type errors
// Typechecking is checked separately via .github/workflows/typecheck.yml
ignoreBuildErrors: true,
// On previews, typechecking is run via GitHub Action only for efficiency
// On production, we turn it on to prevent errors from conflicting PRs getting into
// prod
ignoreBuildErrors: process.env.NEXT_PUBLIC_VERCEL_ENV === 'production' ? false : true,
},
eslint: {
// We are already running linting via GH action, this will skip linting during production build on Vercel
Expand Down
2 changes: 2 additions & 0 deletions apps/docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
"dev:secrets:pull": "AWS_PROFILE=supa-dev node ../../scripts/getSecrets.js -n local/docs",
"dev:watch:troubleshooting": "node ./scripts/troubleshooting/watch.mjs",
"embeddings": "tsx --conditions=react-server scripts/search/generate-embeddings.ts",
"embeddings:nimbus": "ENABLED_FEATURES_OVERRIDE_DISABLE_ALL=true pnpm run embeddings",
"embeddings:refresh": "pnpm run embeddings --refresh",
"embeddings:nimbus:refresh": "ENABLED_FEATURES_OVERRIDE_DISABLE_ALL=true pnpm run embeddings:refresh",
"last-changed": "tsx scripts/last-changed.ts",
"last-changed:reset": "pnpm run last-changed -- --reset",
"lint": "next lint",
Expand Down
1 change: 1 addition & 0 deletions apps/docs/public/humans.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ Tyler Fontaine
Tyler Shukert
TzeYiing L
Wen Bo Xie
Yuliya Marinova
Yuri Santana
____________

Expand Down
19 changes: 17 additions & 2 deletions apps/docs/resources/globalSearch/globalSearchModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { convertPostgrestToApiError, type ApiErrorGeneric } from '~/app/api/util
import { Result } from '~/features/helpers.fn'
import { openAI } from '~/lib/openAi'
import { supabase, type DatabaseCorrected } from '~/lib/supabase'

import { isFeatureEnabled } from 'common/enabled-features'
import { GuideModel } from '../guide/guideModel'
import {
DB_METADATA_TAG_PLATFORM_CLI,
Expand All @@ -13,6 +15,9 @@ import { ReferenceSDKFunctionModel, SDKLanguageValues } from '../reference/refer
import { TroubleshootingModel } from '../troubleshooting/troubleshootingModel'
import { SearchResultInterface } from './globalSearchInterface'

type SearchFunction = 'search_content' | 'search_content_nimbus'
type SearchHybridFunction = 'search_content_hybrid' | 'search_content_hybrid_nimbus'

export abstract class SearchResultModel {
static async search(
args: RootQueryTypeSearchDocsArgs,
Expand All @@ -22,9 +27,14 @@ export abstract class SearchResultModel {
const includeFullContent = requestedFields.includes('content')
const embeddingResult = await openAI().createContentEmbedding(query)

const useAltSearchIndex = !isFeatureEnabled('search:fullIndex')
const searchFunction: SearchFunction = useAltSearchIndex
? 'search_content_nimbus'
: 'search_content'

return embeddingResult.flatMapAsync(async ({ embedding }) => {
const matchResult = new Result(
await supabase().rpc('search_content', {
await supabase().rpc(searchFunction, {
embedding,
include_full_content: includeFullContent,
max_result: args.limit ?? undefined,
Expand All @@ -49,9 +59,14 @@ export abstract class SearchResultModel {
const includeFullContent = requestedFields.includes('content')
const embeddingResult = await openAI().createContentEmbedding(query)

const useAltSearchIndex = !isFeatureEnabled('search:fullIndex')
const searchFunction: SearchHybridFunction = useAltSearchIndex
? 'search_content_hybrid_nimbus'
: 'search_content_hybrid'

return embeddingResult.flatMapAsync(async ({ embedding }) => {
const matchResult = new Result(
await supabase().rpc('search_content_hybrid', {
await supabase().rpc(searchFunction, {
query_text: query,
query_embedding: embedding,
include_full_content: includeFullContent,
Expand Down
106 changes: 106 additions & 0 deletions apps/docs/scripts/search/embeddings/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
export interface PageInfo {
pageId: number
path: string
checksum: string
sectionsCount: number
}

export interface PageSectionForEmbedding {
pageId: number
path: string
slug?: string
heading?: string
content: string
input: string
ragIgnore: boolean
}

export interface PageSectionWithEmbedding extends PageSectionForEmbedding {
embedding: number[]
}

export interface ProcessingResult {
successfulPages: Set<number>
failedPages: Set<number>
totalSectionsProcessed: number
totalSectionsInserted: number
}

export function createBatches<T>(array: T[], batchSize: number): T[][] {
const batches: T[][] = []
for (let i = 0; i < array.length; i += batchSize) {
batches.push(array.slice(i, i + batchSize))
}
return batches
}

export function mapEmbeddingsToSections(
batch: PageSectionForEmbedding[],
data: Array<{ embedding?: number[] }>,
batchNumber: number
): {
sectionsWithEmbeddings: PageSectionWithEmbedding[]
failedSectionIndexes: Set<number>
} {
const sectionsWithEmbeddings: PageSectionWithEmbedding[] = []
const failedSectionIndexes: Set<number> = new Set()

if (batch.length !== data.length) {
console.error(
`Ignoring all embeddings returned from batch ${batchNumber} because returned number doesn't match input number`
)
batch.forEach((_, index) => {
failedSectionIndexes.add(index)
})
}

for (let i = 0; i < batch.length; i++) {
if (data[i].embedding) {
sectionsWithEmbeddings.push({ ...batch[i], embedding: data[i].embedding! })
} else {
failedSectionIndexes.add(i)
}
}

return { sectionsWithEmbeddings, failedSectionIndexes }
}

export function updatePageInsertionCounts(
pageSectionsInserted: Map<number, number>,
sectionsWithEmbeddings: PageSectionWithEmbedding[]
) {
sectionsWithEmbeddings.forEach((section) => {
const current = pageSectionsInserted.get(section.pageId) || 0
pageSectionsInserted.set(section.pageId, current + 1)
})
}

export function computePageResults(
pageInfoMap: Map<number, PageInfo>,
pageSectionsInserted: Map<number, number>,
result: ProcessingResult
) {
for (const [pageId, pageInfo] of pageInfoMap) {
const insertedCount = pageSectionsInserted.get(pageId) || 0
if (insertedCount === pageInfo.sectionsCount && !result.failedPages.has(pageId)) {
result.successfulPages.add(pageId)
} else {
result.failedPages.add(pageId)
console.warn(
`Page ${pageInfo.path}: inserted ${insertedCount}/${pageInfo.sectionsCount} sections`
)
}
}
}

export function logFailedSections(
batch: PageSectionForEmbedding[],
inputs: string[],
failedSectionIndexes: Set<number>
) {
failedSectionIndexes.forEach((i) => {
console.error(
`Failed to process section: ${batch[i].path}#${batch[i].slug} (content: "${inputs[i]?.slice(0, 50)}...")`
)
})
}
Loading
Loading