diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 11ff8c721..4f703c6d1 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -77,8 +77,6 @@ jobs: env: {} - package: graphile/graphile-pgvector-plugin env: {} - - package: graphile/postgraphile-plugin-pgvector - env: {} - package: graphql/server-test env: {} - package: graphql/env diff --git a/GRAPHILE.md b/GRAPHILE.md index a483fa944..91854d48a 100644 --- a/GRAPHILE.md +++ b/GRAPHILE.md @@ -59,7 +59,7 @@ All Graphile RC dependencies are pinned to **exact versions** (no `^` or `~` pre - **graphile-cache** -- LRU cache with PostGraphile v5 integration - **graphile-test** -- PostGraphile v5 testing utilities - **graphile-authz** -- Dynamic authorization plugin for PostGraphile v5 -- **postgraphile-plugin-pgvector** -- pgvector similarity search plugin for PostGraphile v5 +- **graphile-pgvector-plugin** -- pgvector codec + auto-discovered vector search plugin for PostGraphile v5 ### `graphql/` packages diff --git a/graphile/graphile-pgvector-plugin/package.json b/graphile/graphile-pgvector-plugin/package.json index eac96dbfa..302b8e2f9 100644 --- a/graphile/graphile-pgvector-plugin/package.json +++ b/graphile/graphile-pgvector-plugin/package.json @@ -37,12 +37,19 @@ "pgsql-test": "workspace:^" }, "peerDependencies": { + "@dataplan/pg": "1.0.0-rc.5", "graphile-build": "5.0.0-rc.4", "graphile-build-pg": "5.0.0-rc.5", "graphile-config": "1.0.0-rc.5", "graphql": "^16.9.0", "pg-sql2": "5.0.0-rc.4", - "postgraphile": "5.0.0-rc.7" + "postgraphile": "5.0.0-rc.7", + "postgraphile-plugin-connection-filter": "3.0.0-rc.1" + }, + "peerDependenciesMeta": { + "postgraphile-plugin-connection-filter": { + "optional": true + } }, "keywords": [ "postgraphile", diff --git a/graphile/graphile-pgvector-plugin/src/__tests__/vector-search.test.ts b/graphile/graphile-pgvector-plugin/src/__tests__/vector-search.test.ts new file mode 100644 index 000000000..c640ae29d --- /dev/null +++ b/graphile/graphile-pgvector-plugin/src/__tests__/vector-search.test.ts @@ -0,0 +1,368 @@ +import { join } from 'path'; +import { getConnections, seed } from 'graphile-test'; +import type { GraphQLResponse } from 'graphile-test'; +import type { PgTestClient } from 'pgsql-test'; +import { VectorCodecPreset } from '../vector-codec'; +import { createVectorSearchPlugin } from '../vector-search'; + +interface AllDocumentsResult { + allDocuments: { + nodes: Array<{ + rowId: number; + title: string; + content: string | null; + embedding: number[]; + embeddingDistance: number | null; + }>; + }; +} + +type QueryFn = ( + query: string, + variables?: Record +) => Promise>; + +describe('VectorSearchPlugin', () => { + let db: PgTestClient; + let teardown: () => Promise; + let query: QueryFn; + + beforeAll(async () => { + const testPreset = { + extends: [ + VectorCodecPreset, + { + plugins: [createVectorSearchPlugin({ defaultMetric: 'COSINE' })], + }, + ], + }; + + const connections = await getConnections({ + schemas: ['pgvector_test'], + preset: testPreset, + useRoot: true, + authRole: 'postgres', + }, [ + seed.sqlfile([join(__dirname, './setup.sql')]) + ]); + + db = connections.db; + teardown = connections.teardown; + query = connections.query; + + // Start a transaction for savepoint-based test isolation + await db.client.query('BEGIN'); + }); + + afterAll(async () => { + if (db) { + try { + await db.client.query('ROLLBACK'); + } catch { + // Ignore rollback errors + } + } + + if (teardown) { + await teardown(); + } + }); + + beforeEach(async () => { + await db.beforeEach(); + }); + + afterEach(async () => { + await db.afterEach(); + }); + + describe('condition field (vectorEmbedding)', () => { + it('filters by vector similarity with distance threshold', async () => { + const result = await query(` + query { + allDocuments(condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: COSINE + distance: 0.5 + } + }) { + nodes { + rowId + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + expect(nodes!.length).toBeGreaterThan(0); + + // Document A [1,0,0] is identical to query — distance ~0 + // Only docs within distance 0.5 should be returned + const titles = nodes!.map(n => n.title); + expect(titles).toContain('Document A'); + }); + + it('returns embeddingDistance computed field when condition is active', async () => { + const result = await query(` + query { + allDocuments(condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: COSINE + } + }) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + + // All nodes should have a distance value since the condition is active + for (const node of nodes!) { + expect(node.embeddingDistance).toBeDefined(); + expect(typeof node.embeddingDistance).toBe('number'); + } + + // Document A [1,0,0] should have distance ~0 to query [1,0,0] + const docA = nodes!.find(n => n.title === 'Document A'); + expect(docA).toBeDefined(); + expect(docA!.embeddingDistance).toBeCloseTo(0, 2); + }); + + it('returns null for embeddingDistance when no condition is active', async () => { + const result = await query(` + query { + allDocuments { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + + for (const node of nodes!) { + expect(node.embeddingDistance).toBeNull(); + } + }); + + it('supports L2 metric', async () => { + const result = await query(` + query { + allDocuments(condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: L2 + } + }) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + + // L2 distance of identical vectors is 0 + const docA = nodes!.find(n => n.title === 'Document A'); + expect(docA).toBeDefined(); + expect(docA!.embeddingDistance).toBeCloseTo(0, 2); + }); + + it('supports IP metric', async () => { + const result = await query(` + query { + allDocuments(condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: IP + } + }) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + + // Inner product of [1,0,0] with itself is 1, pgvector returns negative: -1 + const docA = nodes!.find(n => n.title === 'Document A'); + expect(docA).toBeDefined(); + expect(docA!.embeddingDistance).toBeCloseTo(-1, 2); + }); + }); + + describe('orderBy (EMBEDDING_DISTANCE_ASC/DESC)', () => { + it('orders by distance ascending when condition is active', async () => { + const result = await query(` + query { + allDocuments( + condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: COSINE + } + } + orderBy: EMBEDDING_DISTANCE_ASC + ) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + expect(nodes!.length).toBeGreaterThan(1); + + // Should be ordered by distance ascending (closest first) + // Document A [1,0,0] should be first (distance ~0) + expect(nodes![0].title).toBe('Document A'); + + // Verify ordering: each distance should be <= next + for (let i = 0; i < nodes!.length - 1; i++) { + expect(nodes![i].embeddingDistance).toBeLessThanOrEqual( + nodes![i + 1].embeddingDistance! + ); + } + }); + + it('orders by distance descending when condition is active', async () => { + const result = await query(` + query { + allDocuments( + condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: COSINE + } + } + orderBy: EMBEDDING_DISTANCE_DESC + ) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + expect(nodes!.length).toBeGreaterThan(1); + + // Should be ordered by distance descending (farthest first) + // Document A [1,0,0] should be last (distance ~0) + expect(nodes![nodes!.length - 1].title).toBe('Document A'); + + // Verify ordering: each distance should be >= next + for (let i = 0; i < nodes!.length - 1; i++) { + expect(nodes![i].embeddingDistance).toBeGreaterThanOrEqual( + nodes![i + 1].embeddingDistance! + ); + } + }); + }); + + describe('composability', () => { + it('combines vector distance threshold with ordering', async () => { + // Use a tight distance threshold to filter, then order by distance + const result = await query(` + query { + allDocuments( + condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: COSINE + distance: 0.5 + } + } + orderBy: EMBEDDING_DISTANCE_ASC + ) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + // Only documents within cosine distance 0.5 of [1,0,0] should be returned + // Document A [1,0,0] → distance ~0 (included) + // Document D [0.707,0.707,0] → distance ~0.293 (included) + // Document E [0.577,0.577,0.577] → distance ~0.423 (included) + // Document B [0,1,0] → distance ~1.0 (excluded) + // Document C [0,0,1] → distance ~1.0 (excluded) + expect(nodes!.length).toBeGreaterThanOrEqual(1); + expect(nodes!.length).toBeLessThanOrEqual(3); + + // First result should be closest: Document A + expect(nodes![0].title).toBe('Document A'); + + // All returned distances should be <= 0.5 + for (const node of nodes!) { + expect(node.embeddingDistance).toBeLessThanOrEqual(0.5); + } + }); + + it('works with pagination (first/offset)', async () => { + const result = await query(` + query { + allDocuments( + condition: { + vectorEmbedding: { + vector: [1, 0, 0] + metric: COSINE + } + } + orderBy: EMBEDDING_DISTANCE_ASC + first: 2 + ) { + nodes { + title + embeddingDistance + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + expect(nodes!.length).toBe(2); + // Closest should be Document A + expect(nodes![0].title).toBe('Document A'); + }); + }); +}); diff --git a/graphile/graphile-pgvector-plugin/src/index.ts b/graphile/graphile-pgvector-plugin/src/index.ts index 9760a7aa0..e339ef6d9 100644 --- a/graphile/graphile-pgvector-plugin/src/index.ts +++ b/graphile/graphile-pgvector-plugin/src/index.ts @@ -1,16 +1,26 @@ /** * graphile-pgvector-plugin * - * PostGraphile v5 codec plugin for pgvector. - * Teaches the schema builder what the `vector` type is so that: - * - vector(n) columns appear on output types and in create/update mutations - * - SQL functions with vector arguments are exposed automatically - * - A `Vector` GraphQL scalar (serialized as [Float]) handles I/O + * PostGraphile v5 plugin suite for pgvector. + * + * Provides two plugins: + * + * 1. **VectorCodecPlugin** — Teaches the schema builder what the `vector` type is so that: + * - vector(n) columns appear on output types and in create/update mutations + * - SQL functions with vector arguments are exposed automatically + * - A `Vector` GraphQL scalar (serialized as [Float]) handles I/O + * + * 2. **VectorSearchPlugin** — Auto-discovers all vector columns and adds: + * - `Nearby` condition fields on connections (filter by distance) + * - `Distance` computed fields on output types + * - `_DISTANCE_ASC/DESC` orderBy enum values + * - `closeTo` connection filter operator for the Vector scalar * * @example * ```typescript * import { VectorCodecPreset } from 'graphile-pgvector-plugin'; * + * // Just add to your preset — everything is auto-discovered, zero config * const preset = { * extends: [VectorCodecPreset], * }; @@ -18,3 +28,5 @@ */ export { VectorCodecPlugin, VectorCodecPreset } from './vector-codec'; +export { VectorSearchPlugin, createVectorSearchPlugin } from './vector-search'; +export type { VectorSearchPluginOptions, VectorMetric } from './types'; diff --git a/graphile/graphile-pgvector-plugin/src/types.ts b/graphile/graphile-pgvector-plugin/src/types.ts new file mode 100644 index 000000000..bfc4109a2 --- /dev/null +++ b/graphile/graphile-pgvector-plugin/src/types.ts @@ -0,0 +1,38 @@ +/** + * graphile-pgvector-plugin Types + * + * Type definitions for the vector search plugin configuration. + */ + +/** + * Supported vector similarity metrics. + * - COSINE: Cosine distance (1 - cosine similarity) + * - L2: Euclidean (L2) distance + * - IP: Inner product (negative, for ordering) + */ +export type VectorMetric = 'COSINE' | 'L2' | 'IP'; + +/** + * Plugin configuration options for VectorSearchPlugin. + */ +export interface VectorSearchPluginOptions { + /** + * Default similarity metric to use when not specified in queries. + * @default 'COSINE' + */ + defaultMetric?: VectorMetric; + + /** + * Maximum limit for vector search results (top-level query fields). + * @default 100 + */ + maxLimit?: number; + + /** + * Prefix for vector condition fields on connection condition inputs. + * For example, with prefix 'vector' and a column named 'embedding', + * the generated condition field will be 'vectorEmbedding'. + * @default 'vector' + */ + conditionPrefix?: string; +} diff --git a/graphile/graphile-pgvector-plugin/src/vector-codec.ts b/graphile/graphile-pgvector-plugin/src/vector-codec.ts index 3d58eff96..8d28251e1 100644 --- a/graphile/graphile-pgvector-plugin/src/vector-codec.ts +++ b/graphile/graphile-pgvector-plugin/src/vector-codec.ts @@ -125,3 +125,6 @@ export const VectorCodecPlugin: GraphileConfig.Plugin = { export const VectorCodecPreset: GraphileConfig.Preset = { plugins: [VectorCodecPlugin], }; + +// Note: The full preset including VectorSearchPlugin is created +// by importing both plugins together. See vector-search.ts. diff --git a/graphile/graphile-pgvector-plugin/src/vector-search.ts b/graphile/graphile-pgvector-plugin/src/vector-search.ts new file mode 100644 index 000000000..b4e650ab8 --- /dev/null +++ b/graphile/graphile-pgvector-plugin/src/vector-search.ts @@ -0,0 +1,481 @@ +/** + * VectorSearchPlugin + * + * Auto-discovers all `vector` columns across all tables and adds: + * + * 1. **vectorSearch** query fields on Query + * - Accepts a query vector, metric, limit, offset + * - Returns rows ordered by distance with a `distance` score + * + * 2. **Nearby** condition fields on connection condition inputs + * - Accepts { vector, metric?, distance? } to filter by distance threshold + * - Computes distance server-side using pgvector operators + * + * 3. **Distance** computed fields on output types + * - Returns the distance value when a nearby condition is active (null otherwise) + * + * 4. **_DISTANCE_ASC/DESC** orderBy enum values + * - Orders results by vector distance when a nearby condition is active + * + * Follows the same patterns as graphile-search-plugin (for tsvector columns). + */ + +import 'graphile-build'; +import 'graphile-build-pg'; +import { TYPES } from '@dataplan/pg'; +import type { GraphileConfig } from 'graphile-config'; +import type { VectorSearchPluginOptions } from './types'; + +/** + * pgvector distance operators. + * - <=> : Cosine distance + * - <-> : L2 (Euclidean) distance + * - <#> : Negative inner product + */ +const METRIC_OPERATORS: Record = { + COSINE: '<=>', + L2: '<->', + IP: '<#>', +}; + +function isVectorCodec(codec: any): boolean { + return codec?.name === 'vector'; +} + +/** + * Navigates from a PgSelectSingleStep up to the PgSelectStep. + * Uses duck-typing to avoid dependency on exact class names across rc versions. + */ +function getPgSelectStep($someStep: any): any | null { + let $step = $someStep; + + if ($step && typeof $step.getClassStep === 'function') { + $step = $step.getClassStep(); + } + + if ($step && typeof $step.orderBy === 'function' && $step.id !== undefined) { + return $step; + } + + return null; +} + +/** + * WeakMap keyed by SQL alias object (shared reference between + * the queryBuilder proxy and PgSelectStep). + * + * Stores per-query vector search state so the distance field's lambda + * can read the computed distance value at execution time. + */ +interface VectorDistanceSlot { + /** Map of fieldName -> index into the select list */ + indices: Record; +} +const vectorDistanceSlots = new WeakMap(); + +/** + * Creates the vector search plugin with the given options. + */ +export function createVectorSearchPlugin( + options: VectorSearchPluginOptions = {} +): GraphileConfig.Plugin { + const { + defaultMetric = 'COSINE', + maxLimit = 100, + conditionPrefix = 'vector', + } = options; + + return { + name: 'VectorSearchPlugin', + version: '1.0.0', + description: + 'Auto-discovers vector columns and adds search fields, conditions, and orderBy', + after: [ + 'VectorCodecPlugin', + 'PgAttributesPlugin', + ], + + schema: { + hooks: { + init(_, build) { + const { + graphql: { + GraphQLList, + GraphQLNonNull, + GraphQLFloat, + }, + } = build; + + // Register the VectorMetric enum type FIRST so it's available + // for VectorNearbyInput's fields resolver + build.registerEnumType( + 'VectorMetric', + {}, + () => ({ + description: 'Similarity metric for vector search', + values: { + COSINE: { + value: 'COSINE', + description: + 'Cosine distance (1 - cosine similarity). Range: 0 (identical) to 2 (opposite).', + }, + L2: { + value: 'L2', + description: + 'Euclidean (L2) distance. Range: 0 (identical) to infinity.', + }, + IP: { + value: 'IP', + description: + 'Negative inner product. Higher (less negative) = more similar.', + }, + }, + }), + 'VectorSearchPlugin registering VectorMetric enum' + ); + + // Register the VectorNearbyInput type for condition fields + build.registerInputObjectType( + 'VectorNearbyInput', + {}, + () => ({ + description: + 'Input for vector similarity search. Provide a query vector, optional metric, and optional max distance threshold.', + fields: () => { + const VectorMetricEnum = + build.getTypeByName('VectorMetric') as any; + + return { + vector: { + type: new GraphQLNonNull( + new GraphQLList(new GraphQLNonNull(GraphQLFloat)) + ), + description: 'Query vector for similarity search.', + }, + metric: { + type: VectorMetricEnum, + description: `Similarity metric to use (default: ${defaultMetric}).`, + }, + distance: { + type: GraphQLFloat, + description: + 'Maximum distance threshold. Only rows within this distance are returned.', + }, + }; + }, + }), + 'VectorSearchPlugin registering VectorNearbyInput type' + ); + + return _; + }, + + /** + * Add `Distance` computed fields to output types for tables + * that have vector columns. + */ + GraphQLObjectType_fields(fields, build, context) { + const { + sql, + inflection, + graphql: { GraphQLFloat }, + grafast: { constant, lambda }, + } = build; + const { + scope: { isPgClassType, pgCodec }, + fieldWithHooks, + } = context; + + if (!isPgClassType || !pgCodec?.attributes) { + return fields; + } + + let newFields = fields; + + for (const [attributeName, attribute] of Object.entries( + pgCodec.attributes as Record + )) { + if (!isVectorCodec(attribute.codec)) continue; + + const baseFieldName = inflection.attribute({ + codec: pgCodec as any, + attributeName, + }); + const fieldName = inflection.camelCase(`${baseFieldName}-distance`); + + newFields = build.extend( + newFields, + { + [fieldName]: fieldWithHooks( + { fieldName } as any, + () => ({ + description: `Vector distance when filtered by \`${baseFieldName}\` nearby condition. Returns null when no nearby condition is active.`, + type: GraphQLFloat, + plan($step: any) { + const $select = getPgSelectStep($step); + if (!$select) return constant(null); + + if ( + typeof $select.setInliningForbidden === 'function' + ) { + $select.setInliningForbidden(); + } + + // Initialise the WeakMap slot for this query + const alias = $select.alias; + if (!vectorDistanceSlots.has(alias)) { + vectorDistanceSlots.set(alias, { + indices: Object.create(null), + }); + } + + const capturedField = baseFieldName; + const capturedAlias = alias; + return lambda( + $step, + (row: any) => { + if (row == null) return null; + const slot = + vectorDistanceSlots.get(capturedAlias); + if ( + !slot || + slot.indices[capturedField] === undefined + ) + return null; + const rawValue = + row[slot.indices[capturedField]]; + return rawValue == null + ? null + : parseFloat(rawValue); + }, + true + ); + }, + }) + ), + }, + `VectorSearchPlugin adding distance field '${fieldName}' for '${attributeName}' on '${pgCodec.name}'` + ); + } + + return newFields; + }, + + /** + * Add orderBy enum values for vector distance: + * _DISTANCE_ASC and _DISTANCE_DESC + */ + GraphQLEnumType_values(values, build, context) { + const { inflection } = build; + const { + scope: { isPgRowSortEnum, pgCodec }, + } = context; + + if (!isPgRowSortEnum || !pgCodec?.attributes) { + return values; + } + + let newValues = values; + + for (const [attributeName, attribute] of Object.entries( + pgCodec.attributes as Record + )) { + if (!isVectorCodec(attribute.codec)) continue; + + const fieldName = inflection.attribute({ + codec: pgCodec as any, + attributeName, + }); + const metaKey = `vector_order_${fieldName}`; + const makePlan = + (direction: 'ASC' | 'DESC') => (step: any) => { + if (typeof step.setMeta === 'function') { + step.setMeta(metaKey, direction); + } + }; + + const ascName = inflection.constantCase( + `${attributeName}_distance_asc` + ); + const descName = inflection.constantCase( + `${attributeName}_distance_desc` + ); + + newValues = build.extend( + newValues, + { + [ascName]: { + extensions: { + grafast: { + apply: makePlan('ASC'), + }, + }, + }, + [descName]: { + extensions: { + grafast: { + apply: makePlan('DESC'), + }, + }, + }, + }, + `VectorSearchPlugin adding distance orderBy for '${attributeName}' on '${pgCodec.name}'` + ); + } + + return newValues; + }, + + /** + * Add `Nearby` condition fields on connection condition input types + * for tables with vector columns. + */ + GraphQLInputObjectType_fields(fields, build, context) { + const { inflection, sql } = build; + const { + scope: { isPgCondition, pgCodec }, + fieldWithHooks, + } = context; + + if ( + !isPgCondition || + !pgCodec || + !pgCodec.attributes || + pgCodec.isAnonymous + ) { + return fields; + } + + const vectorAttributes = Object.entries( + pgCodec.attributes as Record + ).filter(([_name, attr]: [string, any]) => + isVectorCodec(attr.codec) + ); + + if (vectorAttributes.length === 0) { + return fields; + } + + let newFields = fields; + + for (const [attributeName] of vectorAttributes) { + const fieldName = inflection.camelCase( + `${conditionPrefix}_${attributeName}` + ); + const baseFieldName = inflection.attribute({ + codec: pgCodec as any, + attributeName, + }); + + newFields = build.extend( + newFields, + { + [fieldName]: fieldWithHooks( + { + fieldName, + isPgConnectionConditionInputField: true, + }, + { + description: build.wrapDescription( + `Vector similarity search on the \`${attributeName}\` column. ` + + `Provide a query vector to filter and compute distance. ` + + `Optionally specify a metric (COSINE, L2, IP) and maximum distance threshold.`, + 'field' + ), + type: build.getTypeByName( + 'VectorNearbyInput' + ) as any, + apply: function plan( + $condition: any, + val: any + ) { + if (val == null) return; + + const { vector, metric, distance } = val; + if ( + !vector || + !Array.isArray(vector) || + vector.length === 0 + ) + return; + + const resolvedMetric = metric || defaultMetric; + const operator = + METRIC_OPERATORS[resolvedMetric] || + METRIC_OPERATORS.COSINE; + const vectorString = `[${vector.join(',')}]`; + + const columnExpr = sql`${$condition.alias}.${sql.identifier(attributeName)}`; + const vectorExpr = sql`${sql.value(vectorString)}::vector`; + const distanceExpr = sql`(${columnExpr} ${sql.raw(operator)} ${vectorExpr})`; + + // If a distance threshold is provided, add WHERE clause + if ( + distance !== undefined && + distance !== null + ) { + $condition.where( + sql`${distanceExpr} <= ${sql.value(distance)}` + ); + } + + // Add distance to the SELECT list + const $parent = + $condition.dangerouslyGetParent(); + if ( + typeof $parent.selectAndReturnIndex === + 'function' + ) { + const wrappedDistanceSql = sql`${sql.parens(distanceExpr)}::text`; + const distanceIndex = + $parent.selectAndReturnIndex( + wrappedDistanceSql + ); + + // Store index in alias-keyed WeakMap + const slot = vectorDistanceSlots.get( + $condition.alias + ); + if (slot) { + slot.indices[baseFieldName] = + distanceIndex; + } + } + + // ORDER BY distance: only add when the user + // explicitly requested distance ordering via + // the EMBEDDING_DISTANCE_ASC/DESC enum values. + const metaKey = `vector_order_${baseFieldName}`; + const explicitDir = + typeof $parent.getMetaRaw === 'function' + ? $parent.getMetaRaw(metaKey) + : undefined; + if (explicitDir) { + $parent.orderBy({ + fragment: distanceExpr, + codec: TYPES.float, + direction: explicitDir, + }); + } + }, + } + ), + }, + `VectorSearchPlugin adding condition field '${fieldName}' for vector column '${attributeName}' on '${pgCodec.name}'` + ); + } + + return newFields; + }, + }, + }, + }; +} + +/** + * Creates a VectorSearchPlugin with the given options. + * This is the main entry point for using the plugin. + */ +export const VectorSearchPlugin = createVectorSearchPlugin; + +export default VectorSearchPlugin; diff --git a/graphile/graphile-settings/src/plugins/index.ts b/graphile/graphile-settings/src/plugins/index.ts index bc41f4fdf..3900d4c66 100644 --- a/graphile/graphile-settings/src/plugins/index.ts +++ b/graphile/graphile-settings/src/plugins/index.ts @@ -36,8 +36,9 @@ export { } from 'graphile-misc-plugins'; export type { UniqueLookupOptions, TypeMapping, PublicKeyChallengeConfig } from 'graphile-misc-plugins'; -// pgvector — Vector scalar + codec -export { VectorCodecPlugin, VectorCodecPreset } from 'graphile-pgvector-plugin'; +// pgvector — Vector scalar + codec + auto-discovered search/filter/orderBy +export { VectorCodecPlugin, VectorCodecPreset, VectorSearchPlugin, createVectorSearchPlugin } from 'graphile-pgvector-plugin'; +export type { VectorSearchPluginOptions, VectorMetric } from 'graphile-pgvector-plugin'; // Search plugin (stays in graphile-search-plugin, re-exported here for convenience) export { diff --git a/graphile/graphile-settings/src/presets/constructive-preset.ts b/graphile/graphile-settings/src/presets/constructive-preset.ts index 614e85ee9..570592ca2 100644 --- a/graphile/graphile-settings/src/presets/constructive-preset.ts +++ b/graphile/graphile-settings/src/presets/constructive-preset.ts @@ -13,7 +13,7 @@ import { } from 'graphile-misc-plugins'; import { PgSearchPreset } from 'graphile-search-plugin'; import { GraphilePostgisPreset } from 'graphile-postgis'; -import { VectorCodecPreset } from 'graphile-pgvector-plugin'; +import { VectorCodecPreset, createVectorSearchPlugin } from 'graphile-pgvector-plugin'; import { Bm25SearchPreset } from 'graphile-pg-textsearch-plugin'; import { PostgisConnectionFilterPreset } from 'graphile-plugin-connection-filter-postgis'; import { UploadPreset } from 'graphile-upload-plugin'; @@ -40,8 +40,10 @@ import { constructiveUploadFieldDefinitions } from '../upload-resolver'; * - Upload plugin (file upload to S3/MinIO for image, upload, attachment domain columns) * - SQL expression validator (validates @sqlExpression columns in mutations) * - PG type mappings (maps custom types like email, url to GraphQL scalars) + * - pgvector search (auto-discovers vector columns: condition fields, distance computed fields, + * orderBy distance — zero config) * - pg_textsearch BM25 search (auto-discovers BM25 indexes: condition fields, score computed fields, - * orderBy score, connection filter bm25Matches operator — zero config) + * orderBy score — zero config) * * DISABLED PLUGINS: * - PgConnectionArgFilterBackwardRelationsPlugin (relation filters bloat the API) @@ -77,6 +79,9 @@ export const ConstructivePreset: GraphileConfig.Preset = { PgSearchPreset({ pgSearchPrefix: 'fullText' }), GraphilePostgisPreset, VectorCodecPreset, + { + plugins: [createVectorSearchPlugin()], + }, Bm25SearchPreset(), PostgisConnectionFilterPreset, UploadPreset({ diff --git a/graphile/postgraphile-plugin-pgvector/CHANGELOG.md b/graphile/postgraphile-plugin-pgvector/CHANGELOG.md deleted file mode 100644 index 22e2da72e..000000000 --- a/graphile/postgraphile-plugin-pgvector/CHANGELOG.md +++ /dev/null @@ -1,52 +0,0 @@ -# Change Log - -All notable changes to this project will be documented in this file. -See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. - -# [2.3.0](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.2.5...postgraphile-plugin-pgvector@2.3.0) (2026-03-01) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.2.5](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.2.4...postgraphile-plugin-pgvector@2.2.5) (2026-03-01) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.2.4](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.2.3...postgraphile-plugin-pgvector@2.2.4) (2026-02-28) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.2.3](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.2.2...postgraphile-plugin-pgvector@2.2.3) (2026-02-28) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.2.2](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.2.1...postgraphile-plugin-pgvector@2.2.2) (2026-02-26) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.2.1](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.2.0...postgraphile-plugin-pgvector@2.2.1) (2026-02-25) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -# [2.2.0](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.1.2...postgraphile-plugin-pgvector@2.2.0) (2026-02-24) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.1.2](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.1.1...postgraphile-plugin-pgvector@2.1.2) (2026-02-24) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.1.1](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.1.0...postgraphile-plugin-pgvector@2.1.1) (2026-02-19) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -# [2.1.0](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.0.1...postgraphile-plugin-pgvector@2.1.0) (2026-02-19) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -## [2.0.1](https://github.com/constructive-io/constructive/compare/postgraphile-plugin-pgvector@2.0.0...postgraphile-plugin-pgvector@2.0.1) (2026-02-15) - -**Note:** Version bump only for package postgraphile-plugin-pgvector - -# 2.0.0 (2026-02-13) - -**Note:** Version bump only for package postgraphile-plugin-pgvector diff --git a/graphile/postgraphile-plugin-pgvector/README.md b/graphile/postgraphile-plugin-pgvector/README.md deleted file mode 100644 index b3f66df6b..000000000 --- a/graphile/postgraphile-plugin-pgvector/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# postgraphile-plugin-pgvector - -PostGraphile v5 plugin for pgvector similarity search, enabling vector-based queries in your GraphQL API. - -## Features - -- **Vector Similarity Search**: Query your data by vector similarity using pgvector -- **Multiple Distance Metrics**: Support for COSINE, L2 (Euclidean), and IP (Inner Product) metrics -- **Configurable Collections**: Define multiple vector search endpoints for different tables -- **Pagination**: Built-in support for limit and offset parameters - -## Installation - -```bash -pnpm add postgraphile-plugin-pgvector -``` - -## Prerequisites - -- PostgreSQL with pgvector extension installed -- PostGraphile v5 - -## Usage - -```typescript -import { PgVectorPreset } from 'postgraphile-plugin-pgvector'; - -const preset = { - extends: [ - ConstructivePreset, - PgVectorPreset({ - collections: [{ - schema: 'public', - table: 'documents', - embeddingColumn: 'embedding', - graphqlFieldName: 'vectorSearchDocument', - }], - defaultMetric: 'COSINE', - maxLimit: 100, - }), - ], -}; -``` - -## GraphQL Query Example - -```graphql -query { - vectorSearchDocument(query: [0.1, 0.2, 0.3], limit: 10, metric: COSINE) { - id - title - distance - } -} -``` - -## License - -MIT diff --git a/graphile/postgraphile-plugin-pgvector/jest.config.js b/graphile/postgraphile-plugin-pgvector/jest.config.js deleted file mode 100644 index eecd07335..000000000 --- a/graphile/postgraphile-plugin-pgvector/jest.config.js +++ /dev/null @@ -1,18 +0,0 @@ -/** @type {import('ts-jest').JestConfigWithTsJest} */ -module.exports = { - preset: 'ts-jest', - testEnvironment: 'node', - transform: { - '^.+\\.tsx?$': [ - 'ts-jest', - { - babelConfig: false, - tsconfig: 'tsconfig.json' - } - ] - }, - transformIgnorePatterns: [`/node_modules/*`], - testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', - moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], - modulePathIgnorePatterns: ['dist/*'] -}; diff --git a/graphile/postgraphile-plugin-pgvector/package.json b/graphile/postgraphile-plugin-pgvector/package.json deleted file mode 100644 index bc0a9038b..000000000 --- a/graphile/postgraphile-plugin-pgvector/package.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "name": "postgraphile-plugin-pgvector", - "version": "2.3.0", - "author": "Constructive ", - "description": "PostGraphile v5 plugin for pgvector similarity search", - "main": "index.js", - "module": "esm/index.js", - "types": "index.d.ts", - "homepage": "https://github.com/constructive-io/constructive", - "license": "MIT", - "publishConfig": { - "access": "public", - "directory": "dist" - }, - "repository": { - "type": "git", - "url": "https://github.com/constructive-io/constructive" - }, - "bugs": { - "url": "https://github.com/constructive-io/constructive/issues" - }, - "scripts": { - "clean": "makage clean", - "prepack": "npm run build", - "build": "makage build", - "build:dev": "makage build --dev", - "lint": "eslint . --fix", - "test": "jest", - "test:watch": "jest --watch" - }, - "devDependencies": { - "@types/node": "^22.19.1", - "@types/pg": "^8.16.0", - "graphile-settings": "workspace:^", - "graphile-test": "workspace:^", - "makage": "^0.1.10", - "pg": "^8.17.1", - "pgsql-test": "workspace:^" - }, - "dependencies": { - "grafast": "1.0.0-rc.7", - "graphile-build": "5.0.0-rc.4", - "graphile-build-pg": "5.0.0-rc.5", - "graphile-config": "1.0.0-rc.5", - "pg-sql2": "5.0.0-rc.4" - }, - "peerDependencies": { - "graphql": "^16.9.0", - "postgraphile": "5.0.0-rc.7" - }, - "keywords": [ - "postgraphile", - "graphql", - "postgresql", - "pgvector", - "vector", - "similarity", - "embeddings", - "ai", - "constructive" - ] -} diff --git a/graphile/postgraphile-plugin-pgvector/src/__tests__/pgvector.test.ts b/graphile/postgraphile-plugin-pgvector/src/__tests__/pgvector.test.ts deleted file mode 100644 index e10ac07ca..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/__tests__/pgvector.test.ts +++ /dev/null @@ -1,255 +0,0 @@ -import { join } from 'path'; -import { getConnections, seed } from 'graphile-test'; -import type { GraphQLResponse } from 'graphile-test'; -import type { PgTestClient } from 'pgsql-test'; -import { PgVectorPreset } from '../preset'; -import { ConstructivePreset } from 'graphile-settings'; - -interface VectorSearchResult { - vectorSearchDocument: Array<{ - id: number; - title: string; - content: string | null; - distance: number; - }>; -} - -type QueryFn = ( - query: string, - variables?: Record -) => Promise>; - -describe('PgVectorPlugin', () => { - let db: PgTestClient; - let teardown: () => Promise; - let query: QueryFn; - - beforeAll(async () => { - const testPreset = { - extends: [ - ConstructivePreset, - PgVectorPreset({ - collections: [{ - schema: 'pgvector_test', - table: 'documents', - embeddingColumn: 'embedding', - graphqlFieldName: 'vectorSearchDocument', - }], - defaultMetric: 'COSINE', - maxLimit: 100, - }), - ], - }; - - const connections = await getConnections({ - schemas: ['pgvector_test'], - preset: testPreset, - useRoot: true, - }, [ - seed.sqlfile([join(__dirname, './setup.sql')]) - ]); - - db = connections.db; - teardown = connections.teardown; - query = connections.query; - - // Start a transaction for savepoint-based test isolation - await db.client.query('BEGIN'); - }); - - afterAll(async () => { - // Rollback the transaction - if (db) { - try { - await db.client.query('ROLLBACK'); - } catch { - // Ignore rollback errors - } - } - - if (teardown) { - await teardown(); - } - }); - - beforeEach(async () => { - await db.beforeEach(); - }); - - afterEach(async () => { - await db.afterEach(); - }); - - describe('basic vector search', () => { - it('returns results ordered by distance', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 5) { - id - title - distance - } - } - `); - - expect(result.errors).toBeUndefined(); - expect(result.data?.vectorSearchDocument).toBeDefined(); - expect(result.data?.vectorSearchDocument.length).toBeGreaterThan(0); - - // Document A has embedding [1, 0, 0], should be closest to query [1, 0, 0] - const firstResult = result.data?.vectorSearchDocument[0]; - expect(firstResult?.title).toBe('Document A'); - expect(firstResult?.distance).toBeCloseTo(0, 5); - }); - - it('respects limit parameter', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 2) { - id - title - } - } - `); - - expect(result.errors).toBeUndefined(); - expect(result.data?.vectorSearchDocument).toHaveLength(2); - }); - - it('respects offset parameter', async () => { - const resultNoOffset = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 5) { - id - title - } - } - `); - - const resultWithOffset = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 5, offset: 1) { - id - title - } - } - `); - - expect(resultNoOffset.errors).toBeUndefined(); - expect(resultWithOffset.errors).toBeUndefined(); - - // First result with offset should match second result without offset - expect(resultWithOffset.data?.vectorSearchDocument[0]?.title) - .toBe(resultNoOffset.data?.vectorSearchDocument[1]?.title); - }); - }); - - describe('similarity metrics', () => { - it('uses COSINE metric by default', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 1) { - title - distance - } - } - `); - - expect(result.errors).toBeUndefined(); - // Cosine distance of identical vectors is 0 - expect(result.data?.vectorSearchDocument[0]?.distance).toBeCloseTo(0, 5); - }); - - it('supports L2 (Euclidean) metric', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 1, metric: L2) { - title - distance - } - } - `); - - expect(result.errors).toBeUndefined(); - // L2 distance of identical vectors is 0 - expect(result.data?.vectorSearchDocument[0]?.distance).toBeCloseTo(0, 5); - }); - - it('supports IP (inner product) metric', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 1, metric: IP) { - title - distance - } - } - `); - - expect(result.errors).toBeUndefined(); - // Inner product of [1,0,0] with itself is 1, but pgvector returns negative inner product - // so the distance should be -1 - expect(result.data?.vectorSearchDocument[0]?.distance).toBeCloseTo(-1, 5); - }); - - it('returns different distances for different metrics', async () => { - const cosineResult = await query(` - query { - vectorSearchDocument(query: [0.5, 0.5, 0], limit: 1, metric: COSINE) { - title - distance - } - } - `); - - const l2Result = await query(` - query { - vectorSearchDocument(query: [0.5, 0.5, 0], limit: 1, metric: L2) { - title - distance - } - } - `); - - expect(cosineResult.errors).toBeUndefined(); - expect(l2Result.errors).toBeUndefined(); - - // The distances should be different for different metrics - // Document D [0.707, 0.707, 0] should be closest for both metrics - expect(cosineResult.data?.vectorSearchDocument[0]?.title).toBe('Document D'); - expect(l2Result.data?.vectorSearchDocument[0]?.title).toBe('Document D'); - }); - }); - - describe('edge cases', () => { - it('returns empty array when no results match', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [1, 0, 0], limit: 5, offset: 100) { - id - title - } - } - `); - - expect(result.errors).toBeUndefined(); - expect(result.data?.vectorSearchDocument).toHaveLength(0); - }); - - it('handles zero vector query', async () => { - const result = await query(` - query { - vectorSearchDocument(query: [0, 0, 0], limit: 5, metric: L2) { - id - title - distance - } - } - `); - - // Zero vector with L2 metric should return results - expect(result.errors).toBeUndefined(); - expect(result.data?.vectorSearchDocument).toBeDefined(); - expect(result.data?.vectorSearchDocument.length).toBeGreaterThan(0); - }); - }); -}); diff --git a/graphile/postgraphile-plugin-pgvector/src/__tests__/setup.sql b/graphile/postgraphile-plugin-pgvector/src/__tests__/setup.sql deleted file mode 100644 index 4165a79b3..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/__tests__/setup.sql +++ /dev/null @@ -1,32 +0,0 @@ --- Test setup for pgvector plugin tests --- This creates the pgvector extension and a test table with vector embeddings - --- Enable pgvector extension -CREATE EXTENSION IF NOT EXISTS vector; - --- Create test schema -CREATE SCHEMA IF NOT EXISTS pgvector_test; - --- Create test documents table with vector column --- Using 3 dimensions for simplicity in tests -CREATE TABLE pgvector_test.documents ( - id SERIAL PRIMARY KEY, - title TEXT NOT NULL, - content TEXT, - embedding vector(3) NOT NULL, - created_at TIMESTAMPTZ DEFAULT NOW() -); - --- Insert test data with known vectors for predictable distance calculations --- Vector [1, 0, 0] - unit vector along x-axis -INSERT INTO pgvector_test.documents (title, content, embedding) VALUES - ('Document A', 'First test document', '[1, 0, 0]'), - ('Document B', 'Second test document', '[0, 1, 0]'), - ('Document C', 'Third test document', '[0, 0, 1]'), - ('Document D', 'Fourth test document', '[0.707, 0.707, 0]'), - ('Document E', 'Fifth test document', '[0.577, 0.577, 0.577]'); - --- Create an index for performance (optional but good practice) -CREATE INDEX idx_documents_embedding ON pgvector_test.documents -USING ivfflat (embedding vector_cosine_ops) -WITH (lists = 1); diff --git a/graphile/postgraphile-plugin-pgvector/src/__tests__/teardown.sql b/graphile/postgraphile-plugin-pgvector/src/__tests__/teardown.sql deleted file mode 100644 index e237974bf..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/__tests__/teardown.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Teardown for pgvector plugin tests --- Clean up test schema and data - -DROP SCHEMA IF EXISTS pgvector_test CASCADE; diff --git a/graphile/postgraphile-plugin-pgvector/src/index.ts b/graphile/postgraphile-plugin-pgvector/src/index.ts deleted file mode 100644 index e16dd1268..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/index.ts +++ /dev/null @@ -1,52 +0,0 @@ -/** - * PostGraphile v5 pgvector Plugin - * - * Provides vector similarity search capabilities using pgvector. - * - * @example - * ```typescript - * import { PgVectorPlugin, PgVectorPreset } from 'postgraphile-plugin-pgvector'; - * - * // Option 1: Use the preset (recommended) - * const preset = { - * extends: [ - * PgVectorPreset({ - * collections: [{ - * schema: 'public', - * table: 'documents', - * embeddingColumn: 'embedding', - * }], - * }), - * ], - * }; - * - * // Option 2: Use the plugin directly - * const plugin = PgVectorPlugin({ - * collections: [{ - * schema: 'public', - * table: 'documents', - * embeddingColumn: 'embedding', - * }], - * defaultMetric: 'COSINE', - * maxLimit: 100, - * }); - * ``` - */ - -export { PgVectorPlugin, createPgVectorPlugin } from './plugin'; -export { PgVectorPreset } from './preset'; -export { - METRIC_OPERATORS, - buildVectorSearchQuery, - buildVectorSearchQueryWithWhere, - buildDistanceExpression, - formatVectorString, - validateQueryVector, - clampLimit, -} from './sql'; -export type { - VectorMetric, - VectorCollectionConfig, - PgVectorPluginOptions, - VectorSearchResult, -} from './types'; diff --git a/graphile/postgraphile-plugin-pgvector/src/plugin.ts b/graphile/postgraphile-plugin-pgvector/src/plugin.ts deleted file mode 100644 index 9ed6ab15f..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/plugin.ts +++ /dev/null @@ -1,320 +0,0 @@ -/** - * PostGraphile v5 pgvector Plugin - * - * Adds vector similarity search capabilities to PostGraphile using pgvector. - * Uses the graphile-build hooks API to extend the schema with vector search fields. - * Uses Grafast's step-based API for proper v5 compatibility. - */ - -import 'graphile-build'; -import type { GraphileConfig } from 'graphile-config'; -import { - GraphQLObjectType, - GraphQLList, - GraphQLNonNull, - GraphQLString, - GraphQLInt, - GraphQLFloat, - GraphQLEnumType, - GraphQLBoolean, -} from 'graphql'; -import { lambda, context as grafastContext, object, type Step } from 'grafast'; -import type { PgVectorPluginOptions, VectorCollectionConfig, VectorMetric } from './types'; -import { - buildVectorSearchQuery, - formatVectorString, - validateQueryVector, - clampLimit, - compileSql, -} from './sql'; - -declare module 'graphile-config' { - interface GrafastOptions { - pgVectorOptions?: PgVectorPluginOptions; - } -} - -const DEFAULT_METRIC: VectorMetric = 'COSINE'; -const DEFAULT_MAX_LIMIT = 100; - -interface VectorSearchResultRow { - distance: number; - [key: string]: unknown; -} - -/** - * Creates the pgvector plugin using graphile-build hooks - */ -export function createPgVectorPlugin(options: PgVectorPluginOptions): GraphileConfig.Plugin { - const { - collections, - defaultMetric = DEFAULT_METRIC, - maxLimit = DEFAULT_MAX_LIMIT, - } = options; - - return { - name: 'PgVectorPlugin', - version: '1.0.0', - description: 'Adds pgvector similarity search capabilities to PostGraphile', - - schema: { - hooks: { - init(_: any, build: any) { - const { pgRegistry } = build.input; - - for (const collection of collections) { - const resourceKey = `${collection.schema}.${collection.table}`; - let foundResource = null; - - for (const [_key, resource] of Object.entries(pgRegistry.pgResources) as [string, any][]) { - if (!resource.codec?.attributes || resource.codec?.isAnonymous) continue; - - const pgExtensions = resource.codec?.extensions?.pg as { schemaName?: string; name?: string } | undefined; - const schemaName = pgExtensions?.schemaName; - const tableName = pgExtensions?.name || resource.codec?.name; - - if (schemaName === collection.schema && tableName === collection.table) { - foundResource = resource; - break; - } - } - - if (!foundResource) { - console.warn( - `[PgVectorPlugin] Warning: Could not find resource for ${resourceKey}. ` + - `Make sure the table exists and is included in your PostGraphile schemas.` - ); - } - } - - return _; - }, - - GraphQLObjectType_fields(fields: any, build: any, context: any) { - const { Self } = context; - - if (Self.name !== 'Query') { - return fields; - } - - const { pgRegistry } = build.input; - const inflection = build.inflection; - - const VectorMetricEnum = new GraphQLEnumType({ - name: 'VectorMetric', - description: 'Similarity metric for vector search', - values: { - COSINE: { - value: 'COSINE', - description: 'Cosine distance (1 - cosine similarity). Range: 0 (identical) to 2 (opposite).', - }, - L2: { - value: 'L2', - description: 'Euclidean (L2) distance. Range: 0 (identical) to infinity.', - }, - IP: { - value: 'IP', - description: 'Negative inner product. Higher (less negative) values indicate more similarity.', - }, - }, - }); - - const newFields: typeof fields = { ...fields }; - - for (const collection of collections) { - let foundResource: any = null; - - for (const resource of Object.values(pgRegistry.pgResources) as any[]) { - if (!resource.codec?.attributes || resource.codec?.isAnonymous) continue; - - const pgExtensions = resource.codec?.extensions?.pg as { schemaName?: string; name?: string } | undefined; - const schemaName = pgExtensions?.schemaName; - const tableName = pgExtensions?.name || resource.codec?.name; - - if (schemaName === collection.schema && tableName === collection.table) { - foundResource = resource; - break; - } - } - - if (!foundResource) { - continue; - } - - const codec = foundResource.codec; - const tableType = inflection.tableType(codec); - const fieldName = collection.graphqlFieldName || `vectorSearch${tableType}`; - - const VectorSearchResultType = new GraphQLObjectType({ - name: `${tableType}VectorSearchResult`, - description: `Vector search result for ${tableType}`, - fields: () => { - const resultFields: Record = { - distance: { - type: new GraphQLNonNull(GraphQLFloat), - description: 'Distance/similarity score. Interpretation depends on the metric used.', - }, - }; - - for (const [attrName, attr] of Object.entries(codec.attributes) as [string, any][]) { - const gqlType = mapPgTypeToGraphQL(attr.codec?.name, attr.notNull); - if (gqlType) { - resultFields[attrName] = { - type: gqlType, - }; - } - } - - return resultFields; - }, - }); - - const vectorSearchExecutor = createVectorSearchExecutor(collection, defaultMetric, maxLimit); - - newFields[fieldName] = { - type: new GraphQLNonNull(new GraphQLList(new GraphQLNonNull(VectorSearchResultType))), - description: `Search ${tableType} by vector similarity using pgvector`, - args: { - query: { - type: new GraphQLNonNull(new GraphQLList(new GraphQLNonNull(GraphQLFloat))), - description: 'Query vector for similarity search', - }, - limit: { - type: GraphQLInt, - description: `Maximum number of results to return (default: 10, max: ${maxLimit})`, - }, - offset: { - type: GraphQLInt, - description: 'Number of results to skip (default: 0)', - }, - metric: { - type: VectorMetricEnum, - description: `Similarity metric to use (default: ${defaultMetric})`, - }, - }, - extensions: { - grafast: { - plan(_$root: Step, fieldArgs: any) { - const $query = fieldArgs.getRaw('query'); - const $limit = fieldArgs.getRaw('limit'); - const $offset = fieldArgs.getRaw('offset'); - const $metric = fieldArgs.getRaw('metric'); - const $withPgClient = (grafastContext() as any).get('withPgClient'); - const $combined = object({ - query: $query, - limit: $limit, - offset: $offset, - metric: $metric, - withPgClient: $withPgClient, - }); - return lambda($combined, vectorSearchExecutor as any); - }, - }, - }, - }; - } - - return newFields; - }, - }, - }, - }; -} - -function createVectorSearchExecutor( - collection: VectorCollectionConfig, - defaultMetric: VectorMetric, - maxLimit: number -) { - return async (args: { - query: number[]; - limit?: number; - offset?: number; - metric?: VectorMetric; - withPgClient?: (pgSettings: any, callback: (client: any) => Promise) => Promise; - }): Promise => { - const { query, limit = 10, offset = 0, metric = defaultMetric, withPgClient } = args; - - validateQueryVector(query, collection.maxQueryDim); - - const clampedLimit = clampLimit(limit, maxLimit); - const vectorString = formatVectorString(query); - - const sqlQuery = buildVectorSearchQuery( - collection.schema, - collection.table, - collection.embeddingColumn, - metric - ); - - const compiled = compileSql(sqlQuery); - const queryText = compiled.text; - const queryValues = [vectorString, clampedLimit, offset]; - - let result; - - if (withPgClient) { - result = await withPgClient(null, async (client: any) => { - return client.query(queryText, queryValues); - }); - } else { - throw new Error( - '[PgVectorPlugin] No database client available in context. ' + - 'Make sure you are using PostGraphile with a proper database connection.' - ); - } - - return result.rows; - }; -} - -function mapPgTypeToGraphQL(pgType: string | undefined, notNull: boolean): any { - if (!pgType) return null; - - let baseType; - switch (pgType) { - case 'int2': - case 'int4': - case 'int8': - case 'integer': - case 'bigint': - case 'smallint': - baseType = GraphQLInt; - break; - case 'float4': - case 'float8': - case 'real': - case 'double precision': - case 'numeric': - case 'decimal': - baseType = GraphQLFloat; - break; - case 'bool': - case 'boolean': - baseType = GraphQLBoolean; - break; - case 'text': - case 'varchar': - case 'char': - case 'uuid': - case 'timestamptz': - case 'timestamp': - case 'date': - case 'time': - case 'json': - case 'jsonb': - default: - baseType = GraphQLString; - break; - } - - return notNull ? new GraphQLNonNull(baseType) : baseType; -} - -/** - * Creates a PgVectorPlugin with the given options. - * This is the main entry point for using the plugin. - */ -export const PgVectorPlugin = createPgVectorPlugin; - -export default PgVectorPlugin; diff --git a/graphile/postgraphile-plugin-pgvector/src/preset.ts b/graphile/postgraphile-plugin-pgvector/src/preset.ts deleted file mode 100644 index 8b8ccb912..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/preset.ts +++ /dev/null @@ -1,39 +0,0 @@ -/** - * PostGraphile v5 pgvector Preset - * - * Provides a convenient preset for including pgvector support in PostGraphile. - */ - -import type { GraphileConfig } from 'graphile-config'; -import type { PgVectorPluginOptions } from './types'; -import { createPgVectorPlugin } from './plugin'; - -/** - * Creates a preset that includes the pgvector plugin with the given options. - * - * @example - * ```typescript - * import { PgVectorPreset } from 'postgraphile-plugin-pgvector'; - * - * const preset = { - * extends: [ - * PgVectorPreset({ - * collections: [{ - * schema: 'public', - * table: 'documents', - * embeddingColumn: 'embedding', - * }], - * defaultMetric: 'COSINE', - * maxLimit: 100, - * }), - * ], - * }; - * ``` - */ -export function PgVectorPreset(options: PgVectorPluginOptions): GraphileConfig.Preset { - return { - plugins: [createPgVectorPlugin(options)], - }; -} - -export default PgVectorPreset; diff --git a/graphile/postgraphile-plugin-pgvector/src/sql.ts b/graphile/postgraphile-plugin-pgvector/src/sql.ts deleted file mode 100644 index b1b80a944..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/sql.ts +++ /dev/null @@ -1,179 +0,0 @@ -/** - * SQL Query Builder for pgvector - * - * Provides safe, parameterized SQL generation for vector similarity searches. - * All user input is properly escaped using pg-sql2. - */ - -import { sql, type SQL, compile as compileSql } from 'pg-sql2'; -import type { VectorMetric } from './types'; - -export { compileSql }; - -/** - * Maps VectorMetric enum values to pgvector operators. - * - * pgvector operators: - * - <-> : L2 (Euclidean) distance - * - <#> : Negative inner product (for ASC ordering) - * - <=> : Cosine distance - */ -export const METRIC_OPERATORS: Record = { - L2: '<->', - IP: '<#>', - COSINE: '<=>', -}; - -/** - * Builds a safe SQL fragment for the distance operator expression. - * - * @param columnAlias - SQL alias for the table - * @param columnName - Name of the embedding column - * @param metric - The similarity metric to use - * @returns SQL fragment for the distance calculation - */ -export function buildDistanceExpression( - columnAlias: SQL, - columnName: string, - metric: VectorMetric -): SQL { - const operator = METRIC_OPERATORS[metric]; - // Use sql.raw for the operator since it's from our controlled mapping - return sql`${columnAlias}.${sql.identifier(columnName)} ${sql.raw(operator)} $1::vector`; -} - -/** - * Builds a complete vector search query. - * - * The query: - * 1. Selects all columns from the table plus the distance - * 2. Orders by distance (ascending for all metrics) - * 3. Applies LIMIT and OFFSET - * - * @param schema - PostgreSQL schema name - * @param table - PostgreSQL table name - * @param embeddingColumn - Name of the vector column - * @param metric - Similarity metric to use - * @returns SQL query template (parameters: $1=vector, $2=limit, $3=offset) - */ -export function buildVectorSearchQuery( - schema: string, - table: string, - embeddingColumn: string, - metric: VectorMetric -): SQL { - const tableRef = sql.identifier(schema, table); - const alias = sql.identifier('t'); - const operator = METRIC_OPERATORS[metric]; - - // Build the query with parameterized values - // $1 = query vector (cast to vector type) - // $2 = limit - // $3 = offset - return sql` - SELECT ${alias}.*, - (${alias}.${sql.identifier(embeddingColumn)} ${sql.raw(operator)} $1::vector) AS distance - FROM ${tableRef} ${alias} - ORDER BY ${alias}.${sql.identifier(embeddingColumn)} ${sql.raw(operator)} $1::vector - LIMIT $2 - OFFSET $3 - `; -} - -/** - * Builds a vector search query with an optional WHERE clause. - * - * @param schema - PostgreSQL schema name - * @param table - PostgreSQL table name - * @param embeddingColumn - Name of the vector column - * @param metric - Similarity metric to use - * @param whereClause - Optional SQL WHERE clause fragment - * @returns SQL query template - */ -export function buildVectorSearchQueryWithWhere( - schema: string, - table: string, - embeddingColumn: string, - metric: VectorMetric, - whereClause?: SQL -): SQL { - const tableRef = sql.identifier(schema, table); - const alias = sql.identifier('t'); - const operator = METRIC_OPERATORS[metric]; - - const baseQuery = sql` - SELECT ${alias}.*, - (${alias}.${sql.identifier(embeddingColumn)} ${sql.raw(operator)} $1::vector) AS distance - FROM ${tableRef} ${alias} - `; - - if (whereClause) { - return sql` - ${baseQuery} - WHERE ${whereClause} - ORDER BY ${alias}.${sql.identifier(embeddingColumn)} ${sql.raw(operator)} $1::vector - LIMIT $2 - OFFSET $3 - `; - } - - return sql` - ${baseQuery} - ORDER BY ${alias}.${sql.identifier(embeddingColumn)} ${sql.raw(operator)} $1::vector - LIMIT $2 - OFFSET $3 - `; -} - -/** - * Formats a JavaScript array of numbers as a pgvector string. - * - * @param vector - Array of numbers representing the vector - * @returns String in pgvector format: '[1.0,2.0,3.0]' - */ -export function formatVectorString(vector: number[]): string { - return `[${vector.join(',')}]`; -} - -/** - * Validates a query vector. - * - * @param vector - The vector to validate - * @param maxDim - Optional maximum dimension limit - * @throws Error if validation fails - */ -export function validateQueryVector( - vector: unknown, - maxDim?: number -): asserts vector is number[] { - if (!Array.isArray(vector)) { - throw new Error('Query vector must be an array'); - } - - if (vector.length === 0) { - throw new Error('Query vector cannot be empty'); - } - - for (let i = 0; i < vector.length; i++) { - if (typeof vector[i] !== 'number' || !Number.isFinite(vector[i])) { - throw new Error(`Query vector element at index ${i} must be a finite number`); - } - } - - if (maxDim !== undefined && vector.length > maxDim) { - throw new Error( - `Query vector dimension (${vector.length}) exceeds maximum allowed (${maxDim})` - ); - } -} - -/** - * Clamps a limit value to the maximum allowed. - * - * @param limit - Requested limit - * @param maxLimit - Maximum allowed limit - * @returns Clamped limit value - */ -export function clampLimit(limit: number, maxLimit: number): number { - return Math.min(Math.max(1, limit), maxLimit); -} diff --git a/graphile/postgraphile-plugin-pgvector/src/types.ts b/graphile/postgraphile-plugin-pgvector/src/types.ts deleted file mode 100644 index 3ce06e50e..000000000 --- a/graphile/postgraphile-plugin-pgvector/src/types.ts +++ /dev/null @@ -1,104 +0,0 @@ -/** - * pgvector Plugin Types - * - * Type definitions for the PostGraphile pgvector plugin configuration. - */ - -/** - * Supported vector similarity metrics. - * - COSINE: Cosine distance (1 - cosine similarity) - * - L2: Euclidean (L2) distance - * - IP: Inner product (negative, for ordering) - */ -export type VectorMetric = 'COSINE' | 'L2' | 'IP'; - -/** - * Configuration for a single vector search collection. - */ -export interface VectorCollectionConfig { - /** - * PostgreSQL schema name containing the table. - */ - schema: string; - - /** - * PostgreSQL table name. - */ - table: string; - - /** - * Name of the column containing the vector embedding. - * Must be of type `vector(n)` from pgvector. - */ - embeddingColumn: string; - - /** - * Primary key column name. - * If not provided, will attempt to infer from the table. - * @default inferred from table introspection - */ - primaryKey?: string; - - /** - * Custom GraphQL field name for the vector search query. - * If not provided, will be generated as `vectorSearch_`. - */ - graphqlFieldName?: string; - - /** - * Maximum allowed dimension for query vectors. - * Used for defensive validation to prevent mismatched dimensions. - */ - maxQueryDim?: number; -} - -/** - * Plugin configuration options. - */ -export interface PgVectorPluginOptions { - /** - * Array of collection configurations for vector search. - * Each collection maps to a table with a vector column. - */ - collections: VectorCollectionConfig[]; - - /** - * Default similarity metric to use when not specified in queries. - * @default 'COSINE' - */ - defaultMetric?: VectorMetric; - - /** - * Maximum limit for vector search results. - * Used to prevent excessive result sets. - * @default 100 - */ - maxLimit?: number; - - /** - * Whether to require RLS-safe queries. - * When true, queries use the same connection as other PostGraphile queries, - * ensuring Row Level Security policies are applied. - * @default true - */ - requireRlsSafe?: boolean; -} - -/** - * Result row from a vector search query. - */ -export interface VectorSearchResult> { - /** - * The row data from the table. - */ - row: T; - - /** - * The distance/similarity score. - * Interpretation depends on the metric used: - * - COSINE: 0 = identical, 2 = opposite - * - L2: 0 = identical, higher = more different - * - IP: Higher (less negative) = more similar - */ - distance: number; -} diff --git a/graphile/postgraphile-plugin-pgvector/tsconfig.esm.json b/graphile/postgraphile-plugin-pgvector/tsconfig.esm.json deleted file mode 100644 index f624f9670..000000000 --- a/graphile/postgraphile-plugin-pgvector/tsconfig.esm.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "extends": "./tsconfig.json", - "compilerOptions": { - "outDir": "dist/esm", - "module": "ESNext" - } -} diff --git a/graphile/postgraphile-plugin-pgvector/tsconfig.json b/graphile/postgraphile-plugin-pgvector/tsconfig.json deleted file mode 100644 index 9c8a7d7c1..000000000 --- a/graphile/postgraphile-plugin-pgvector/tsconfig.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "extends": "../../tsconfig.json", - "compilerOptions": { - "outDir": "dist", - "rootDir": "src" - }, - "include": ["src/**/*"] -} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 71d00a7b9..29eff921c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -311,6 +311,9 @@ importers: graphile/graphile-pgvector-plugin: dependencies: + '@dataplan/pg': + specifier: 1.0.0-rc.5 + version: 1.0.0-rc.5(@dataplan/json@1.0.0-rc.5(grafast@1.0.0-rc.7(graphql@16.12.0)))(grafast@1.0.0-rc.7(graphql@16.12.0))(graphile-config@1.0.0-rc.5)(graphql@16.12.0)(pg-sql2@5.0.0-rc.4)(pg@8.17.1) graphile-build: specifier: 5.0.0-rc.4 version: 5.0.0-rc.4(grafast@1.0.0-rc.7(graphql@16.12.0))(graphile-config@1.0.0-rc.5)(graphql@16.12.0) @@ -329,6 +332,9 @@ importers: postgraphile: specifier: 5.0.0-rc.7 version: 5.0.0-rc.7(31cdc8d134b08730595d424ed57d41b2) + postgraphile-plugin-connection-filter: + specifier: 3.0.0-rc.1 + version: 3.0.0-rc.1 devDependencies: '@types/node': specifier: ^22.19.1 @@ -773,53 +779,6 @@ importers: version: 0.1.12 publishDirectory: dist - graphile/postgraphile-plugin-pgvector: - dependencies: - grafast: - specifier: 1.0.0-rc.7 - version: 1.0.0-rc.7(graphql@16.12.0) - graphile-build: - specifier: 5.0.0-rc.4 - version: 5.0.0-rc.4(grafast@1.0.0-rc.7(graphql@16.12.0))(graphile-config@1.0.0-rc.5)(graphql@16.12.0) - graphile-build-pg: - specifier: 5.0.0-rc.5 - version: 5.0.0-rc.5(@dataplan/pg@1.0.0-rc.5(@dataplan/json@1.0.0-rc.5(grafast@1.0.0-rc.7(graphql@16.12.0)))(grafast@1.0.0-rc.7(graphql@16.12.0))(graphile-config@1.0.0-rc.5)(graphql@16.12.0)(pg-sql2@5.0.0-rc.4)(pg@8.17.1))(grafast@1.0.0-rc.7(graphql@16.12.0))(graphile-build@5.0.0-rc.4(grafast@1.0.0-rc.7(graphql@16.12.0))(graphile-config@1.0.0-rc.5)(graphql@16.12.0))(graphile-config@1.0.0-rc.5)(graphql@16.12.0)(pg-sql2@5.0.0-rc.4)(pg@8.17.1)(tamedevil@0.1.0-rc.4) - graphile-config: - specifier: 1.0.0-rc.5 - version: 1.0.0-rc.5 - graphql: - specifier: ^16.9.0 - version: 16.12.0 - pg-sql2: - specifier: 5.0.0-rc.4 - version: 5.0.0-rc.4 - postgraphile: - specifier: 5.0.0-rc.7 - version: 5.0.0-rc.7(31cdc8d134b08730595d424ed57d41b2) - devDependencies: - '@types/node': - specifier: ^22.19.1 - version: 22.19.11 - '@types/pg': - specifier: ^8.16.0 - version: 8.16.0 - graphile-settings: - specifier: workspace:^ - version: link:../graphile-settings/dist - graphile-test: - specifier: workspace:^ - version: link:../graphile-test/dist - makage: - specifier: ^0.1.10 - version: 0.1.12 - pg: - specifier: ^8.17.1 - version: 8.17.1 - pgsql-test: - specifier: workspace:^ - version: link:../../postgres/pgsql-test/dist - publishDirectory: dist - graphql/codegen: dependencies: '@0no-co/graphql.web':