From 81a144957cddc5ffaf0bed1f1e772c596168377e Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 11 Jul 2024 09:14:10 -0400 Subject: [PATCH 1/5] [Go] pgvector sample A sample demonstrating how to use Postgres's vector extension to build an indexer and retriever. Formerly #375. --- go/go.mod | 2 + go/go.sum | 4 + go/samples/pgvector/main.go | 218 +++++++++++++++++++++++++++++++ go/samples/pgvector/pgvector.sql | 22 ++++ 4 files changed, 246 insertions(+) create mode 100644 go/samples/pgvector/main.go create mode 100644 go/samples/pgvector/pgvector.sql diff --git a/go/go.mod b/go/go.mod index 60f1a33dcb..5ac1aae651 100644 --- a/go/go.mod +++ b/go/go.mod @@ -49,7 +49,9 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/gax-go/v2 v2.12.4 // indirect github.com/kr/text v0.2.0 // indirect + github.com/lib/pq v1.10.9 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/pgvector/pgvector-go v0.2.0 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect go.opencensus.io v0.24.0 // indirect diff --git a/go/go.sum b/go/go.sum index ea68f9f6e6..fb742fe227 100644 --- a/go/go.sum +++ b/go/go.sum @@ -100,8 +100,12 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/pgvector/pgvector-go v0.2.0 h1:NZdW4NxUxdSCzaev3LVHb9ORf+LdX+uZOQVqQ6s2Zyg= +github.com/pgvector/pgvector-go v0.2.0/go.mod h1:OQpvU5QZGQOPI9quIXAyHaRZ5yGk/RGUDbs9C3DPUNE= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= diff --git a/go/samples/pgvector/main.go b/go/samples/pgvector/main.go new file mode 100644 index 0000000000..de3f6bc692 --- /dev/null +++ b/go/samples/pgvector/main.go @@ -0,0 +1,218 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This program shows how to use Postgres's pgvector extension with Genkit. + +// This program can be manually tested like so: +// +// In development mode (with the environment variable GENKIT_ENV="dev"): +// Start the server listening on port 3100: +// +// go run . -dbconn "$DBCONN" -apikey $API_KEY & +// +// Ask a question: +// +// curl -d '{"Show": "Best Friends", "Question": "Who does Alice love?"}' http://localhost:3400/askQuestion +package main + +import ( + "context" + "database/sql" + "errors" + "flag" + "fmt" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googleai" + _ "github.com/lib/pq" + pgv "github.com/pgvector/pgvector-go" +) + +var ( + connString = flag.String("dbconn", "", "database connection string") + apiKey = flag.String("apikey", "", "Gemini API key") + index = flag.Bool("index", false, "index the existing data") +) + +func main() { + flag.Parse() + if err := run(); err != nil { + log.Fatal(err) + } +} + +func run() error { + if *connString == "" { + return errors.New("need -dbconn") + } + if *apiKey == "" { + return errors.New("need -apikey") + } + ctx := context.Background() + if err := googleai.Init(ctx, &googleai.Config{APIKey: *apiKey}); err != nil { + return err + } + const embedderName = "embedding-001" + embedder := googleai.Embedder(embedderName) + if embedder == nil { + return fmt.Errorf("embedder %s is not known to the googleai plugin", embedderName) + } + + db, err := sql.Open("postgres", *connString) + if err != nil { + return err + } + defer db.Close() + + if *index { + indexer := defineIndexer(db, embedder) + if err := indexExistingRows(ctx, db, indexer); err != nil { + return err + } + } + + retriever := defineRetriever(db, embedder) + + type input struct { + Question string + Show string + } + + genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) { + res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{ + Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}}, + Options: in.Show, + }) + if err != nil { + return "", err + } + for _, doc := range res.Documents { + fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text) + } + // Use documents in RAG prompts. + return "", nil + }) + + return genkit.Init(ctx, nil) +} + +const provider = "pgvector" + +func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever { + f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}}) + if err != nil { + return nil, err + } + rows, err := db.QueryContext(ctx, ` + SELECT episode_id, season_number, chunk as content + FROM embeddings + WHERE show_id = $1 + ORDER BY embedding <#> $2 + LIMIT 2`, + req.Options, pgv.NewVector(eres.Embeddings[0].Embedding)) + if err != nil { + return nil, err + } + defer rows.Close() + + res := &ai.RetrieverResponse{} + for rows.Next() { + var eid, sn int + var content string + if err := rows.Scan(&eid, &sn, &content); err != nil { + return nil, err + } + meta := map[string]any{ + "episode_id": eid, + "season_number": sn, + } + doc := &ai.Document{ + Content: []*ai.Part{ai.NewTextPart(content)}, + Metadata: meta, + } + res.Documents = append(res.Documents, doc) + } + if err := rows.Err(); err != nil { + return nil, err + } + return res, nil + } + return ai.DefineRetriever(provider, "shows", f) +} + +func defineIndexer(db *sql.DB, embedder *ai.Embedder) *ai.Indexer { + // The indexer assumes that each Document has a single part, to be embedded, and metadata fields + // for the table primary key: show_id, season_number, episode_id. + const query = ` + UPDATE embeddings + SET embedding = $4 + WHERE show_id = $1 AND season_number = $2 AND episode_id = $3 + ` + return ai.DefineIndexer(provider, "shows", func(ctx context.Context, req *ai.IndexerRequest) error { + res, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: req.Documents}) + if err != nil { + return err + } + // You may want to use your database's batch functionality to insert the embeddings + // more efficiently. + for i, emb := range res.Embeddings { + doc := req.Documents[i] + args := make([]any, 4) + for j, k := range []string{"show_id", "season_number", "episode_id"} { + if a, ok := doc.Metadata[k]; ok { + args[j] = a + } else { + return fmt.Errorf("doc[%d]: missing metadata key %q", i, k) + } + } + args[3] = pgv.NewVector(emb.Embedding) + if _, err := db.ExecContext(ctx, query, args...); err != nil { + return err + } + } + return nil + }) +} + +func indexExistingRows(ctx context.Context, db *sql.DB, indexer *ai.Indexer) error { + rows, err := db.QueryContext(ctx, `SELECT show_id, season_number, episode_id, chunk FROM embeddings`) + if err != nil { + return err + } + defer rows.Close() + + req := &ai.IndexerRequest{} + for rows.Next() { + var sid, chunk string + var sn, eid int + if err := rows.Scan(&sid, &sn, &eid, &chunk); err != nil { + return err + } + req.Documents = append(req.Documents, &ai.Document{ + Content: []*ai.Part{ai.NewTextPart(chunk)}, + Metadata: map[string]any{ + "show_id": sid, + "season_number": sn, + "episode_id": eid, + }, + }) + } + if err := rows.Err(); err != nil { + return err + } + return indexer.Index(ctx, req) +} diff --git a/go/samples/pgvector/pgvector.sql b/go/samples/pgvector/pgvector.sql new file mode 100644 index 0000000000..a252e9e3d5 --- /dev/null +++ b/go/samples/pgvector/pgvector.sql @@ -0,0 +1,22 @@ +-- This SQL enables the vector extension and creates the table and data used +-- in the accompanying sample. + +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE embeddings ( + show_id TEXT NOT NULL, + season_number INTEGER NOT NULL, + episode_id INTEGER NOT NULL, + chunk TEXT, + embedding vector(768), + PRIMARY KEY (show_id, season_number, episode_id) +); + +INSERT INTO embeddings (show_id, season_number, episode_id, chunk) VALUES + ('La Vie', 1, 1, 'Natasha confesses her love for Pierre.'), + ('La Vie', 1, 2, 'Pierre and Natasha become engaged.'), + ('La Vie', 1, 3, 'Margot and Henri divorce.'), + ('Best Friends', 1, 1, 'Alice confesses her love for Oscar.'), + ('Best Friends', 1, 2, 'Oscar and Alice become engaged.'), + ('Best Friends', 1, 3, 'Bob and Pat divorce.') +; From 2e3007ff2146f8b39d863cf05867b6758e2aee32 Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Thu, 11 Jul 2024 14:48:46 +0000 Subject: [PATCH 2/5] Update pgvector docs to include Go --- docs/templates/pgvector | 107 +++++++++++++++++ docs/templates/pgvector.md | 224 ++++++++++++++++++++++++++---------- go/samples/pgvector/main.go | 8 ++ 3 files changed, 277 insertions(+), 62 deletions(-) create mode 100644 docs/templates/pgvector diff --git a/docs/templates/pgvector b/docs/templates/pgvector new file mode 100644 index 0000000000..90a63965c1 --- /dev/null +++ b/docs/templates/pgvector @@ -0,0 +1,107 @@ +# pgvector retriever template + +You can use PostgreSQL and `pgvector` as your retriever implementation. Use the +following examples as a starting point and modify it to work with your database +schema. + +For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the +Postgres client, but you may use another client libaray of your choice. + +
+ +
+

Node.js (Typescript)

+

```js + import { embed } from '@genkit-ai/ai/embedder'; + import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever'; + import { defineFlow } from '@genkit-ai/flow'; + import { textEmbeddingGecko } from '@genkit-ai/vertexai'; + import { toSql } from 'pgvector'; + import postgres from 'postgres'; + import { z } from 'zod'; + + const sql = postgres({ ssl: false, database: 'recaps' }); + + const QueryOptions = z.object({ + show: z.string(), + k: z.number().optional(), + }); + + const sqlRetriever = defineRetriever( + { + name: 'pgvector-myTable', + configSchema: QueryOptions, + }, + async (input, options) => { + const embedding = await embed({ + embedder: textEmbeddingGecko, + content: input, + }); + const results = await sql + SELECT episode_id, season_number, chunk as content + FROM embeddings + WHERE show_id = ${options.show} + ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3} + ; + return { + documents: results.map((row) => { + const { content, ...metadata } = row; + return Document.fromText(content, metadata); + }), + }; + } + ); + ``` +

+
+
+

Go

+

+ %include ../go/samples/pgvector/main.go retr +

+
+ +
+
+ +And here's how to use the retriever in a flow: + +
+ +
+

Node.js (Typescript)

+

+ ```js + // Simple flow to use the sqlRetriever + export const askQuestionsOnGoT = defineFlow( + { + name: 'askQuestionsOnGoT', + inputSchema: z.string(), + outputSchema: z.string(), + }, + async (inputQuestion) => { + const docs = await retrieve({ + retriever: sqlRetriever, + query: inputQuestion, + options: { + show: 'Game of Thrones', + }, + }); + console.log(docs); + + // Continue with using retrieved docs + // in RAG prompts. + //... + } + ); + ``` +

+
+
+

Go

+

+ %include ../go/samples/pgvector/main.go use-retr +

+
+
+
\ No newline at end of file diff --git a/docs/templates/pgvector.md b/docs/templates/pgvector.md index be8cf3b0cb..62a6899ab2 100644 --- a/docs/templates/pgvector.md +++ b/docs/templates/pgvector.md @@ -1,74 +1,174 @@ + + # pgvector retriever template You can use PostgreSQL and `pgvector` as your retriever implementation. Use the -following example as a starting point and modify it to work with your database +following examples as a starting point and modify it to work with your database schema. -```js -import { embed } from '@genkit-ai/ai/embedder'; -import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever'; -import { defineFlow } from '@genkit-ai/flow'; -import { textEmbeddingGecko } from '@genkit-ai/vertexai'; -import { toSql } from 'pgvector'; -import postgres from 'postgres'; -import { z } from 'zod'; +For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the +Postgres client, but you may use another client libaray of your choice. + +
+ +
+

Node.js (Typescript)

+

```js + import { embed } from '@genkit-ai/ai/embedder'; + import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever'; + import { defineFlow } from '@genkit-ai/flow'; + import { textEmbeddingGecko } from '@genkit-ai/vertexai'; + import { toSql } from 'pgvector'; + import postgres from 'postgres'; + import { z } from 'zod'; + + const sql = postgres({ ssl: false, database: 'recaps' }); -const sql = postgres({ ssl: false, database: 'recaps' }); + const QueryOptions = z.object({ + show: z.string(), + k: z.number().optional(), + }); -const QueryOptions = z.object({ - show: z.string(), - k: z.number().optional(), -}); + const sqlRetriever = defineRetriever( + { + name: 'pgvector-myTable', + configSchema: QueryOptions, + }, + async (input, options) => { + const embedding = await embed({ + embedder: textEmbeddingGecko, + content: input, + }); + const results = await sql + SELECT episode_id, season_number, chunk as content + FROM embeddings + WHERE show_id = ${options.show} + ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3} + ; + return { + documents: results.map((row) => { + const { content, ...metadata } = row; + return Document.fromText(content, metadata); + }), + }; + } + ); + ``` +

+
+
+

Go

+

+ ```go + func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever { + f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}}) + if err != nil { + return nil, err + } + rows, err := db.QueryContext(ctx, ` + SELECT episode_id, season_number, chunk as content + FROM embeddings + WHERE show_id = $1 + ORDER BY embedding <#> $2 + LIMIT 2`, + req.Options, pgv.NewVector(eres.Embeddings[0].Embedding)) + if err != nil { + return nil, err + } + defer rows.Close() -const sqlRetriever = defineRetriever( - { - name: 'pgvector-myTable', - configSchema: QueryOptions, - }, - async (input, options) => { - const embedding = await embed({ - embedder: textEmbeddingGecko, - content: input, - }); - const results = await sql` - SELECT episode_id, season_number, chunk as content - FROM embeddings - WHERE show_id = ${options.show} - ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3} - `; - return { - documents: results.map((row) => { - const { content, ...metadata } = row; - return Document.fromText(content, metadata); - }), - }; - } -); -``` + res := &ai.RetrieverResponse{} + for rows.Next() { + var eid, sn int + var content string + if err := rows.Scan(&eid, &sn, &content); err != nil { + return nil, err + } + meta := map[string]any{ + "episode_id": eid, + "season_number": sn, + } + doc := &ai.Document{ + Content: []*ai.Part{ai.NewTextPart(content)}, + Metadata: meta, + } + res.Documents = append(res.Documents, doc) + } + if err := rows.Err(); err != nil { + return nil, err + } + return res, nil + } + return ai.DefineRetriever(provider, "shows", f) + } + ``` +

+
+ +
+
And here's how to use the retriever in a flow: -```js -// Simple flow to use the sqlRetriever -export const askQuestionsOnGoT = defineFlow( - { - name: 'askQuestionsOnGoT', - inputSchema: z.string(), - outputSchema: z.string(), - }, - async (inputQuestion) => { - const docs = await retrieve({ - retriever: sqlRetriever, - query: inputQuestion, - options: { - show: 'Game of Thrones', - }, - }); - console.log(docs); +
+ +
+

Node.js (Typescript)

+

+ ```js + // Simple flow to use the sqlRetriever + export const askQuestionsOnGoT = defineFlow( + { + name: 'askQuestionsOnGoT', + inputSchema: z.string(), + outputSchema: z.string(), + }, + async (inputQuestion) => { + const docs = await retrieve({ + retriever: sqlRetriever, + query: inputQuestion, + options: { + show: 'Game of Thrones', + }, + }); + console.log(docs); + + // Continue with using retrieved docs + // in RAG prompts. + //... + } + ); + ``` +

+
+
+

Go

+

+ ```go + retriever := defineRetriever(db, embedder) + + type input struct { + Question string + Show string + } - // Continue with using retrieved docs - // in RAG prompts. - //... - } -); -``` + genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) { + res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{ + Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}}, + Options: in.Show, + }) + if err != nil { + return "", err + } + for _, doc := range res.Documents { + fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text) + } + // Use documents in RAG prompts. + return "", nil + }) + ``` +

+
+
+
diff --git a/go/samples/pgvector/main.go b/go/samples/pgvector/main.go index de3f6bc692..4b940654c1 100644 --- a/go/samples/pgvector/main.go +++ b/go/samples/pgvector/main.go @@ -26,6 +26,7 @@ // curl -d '{"Show": "Best Friends", "Question": "Who does Alice love?"}' http://localhost:3400/askQuestion package main +// !+imports import ( "context" "database/sql" @@ -41,6 +42,8 @@ import ( pgv "github.com/pgvector/pgvector-go" ) +// !-imports + var ( connString = flag.String("dbconn", "", "database connection string") apiKey = flag.String("apikey", "", "Gemini API key") @@ -84,6 +87,7 @@ func run() error { } } + // !+use-retr retriever := defineRetriever(db, embedder) type input struct { @@ -105,12 +109,14 @@ func run() error { // Use documents in RAG prompts. return "", nil }) + // !-use-retr return genkit.Init(ctx, nil) } const provider = "pgvector" +// !+retr func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever { f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}}) @@ -154,6 +160,8 @@ func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever { return ai.DefineRetriever(provider, "shows", f) } +// !-retr + func defineIndexer(db *sql.DB, embedder *ai.Embedder) *ai.Indexer { // The indexer assumes that each Document has a single part, to be embedded, and metadata fields // for the table primary key: show_id, season_number, episode_id. From 0b5d649467b9a758bc2c318ef0bc5e402e1721ea Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Thu, 11 Jul 2024 14:56:07 +0000 Subject: [PATCH 3/5] Format --- docs/templates/pgvector | 1 + docs/templates/pgvector.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/templates/pgvector b/docs/templates/pgvector index 90a63965c1..fa5d52beb5 100644 --- a/docs/templates/pgvector +++ b/docs/templates/pgvector @@ -103,5 +103,6 @@ And here's how to use the retriever in a flow: %include ../go/samples/pgvector/main.go use-retr

+ \ No newline at end of file diff --git a/docs/templates/pgvector.md b/docs/templates/pgvector.md index 62a6899ab2..e2f1e19f28 100644 --- a/docs/templates/pgvector.md +++ b/docs/templates/pgvector.md @@ -170,5 +170,6 @@ And here's how to use the retriever in a flow: ```

+ From df17e794229fed51e52c5871ee9e7ce8db96d830 Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Thu, 11 Jul 2024 21:40:56 +0000 Subject: [PATCH 4/5] Separate go docs --- docs-go/Makefile | 2 +- docs-go/pgvector.md | 85 ++++++++++++++ docs-go/pgvector.src | 18 +++ docs/templates/pgvector | 108 ----------------- docs/templates/pgvector.md | 225 ++++++++++-------------------------- go/samples/pgvector/main.go | 3 - 6 files changed, 166 insertions(+), 275 deletions(-) create mode 100644 docs-go/pgvector.md create mode 100644 docs-go/pgvector.src delete mode 100644 docs/templates/pgvector diff --git a/docs-go/Makefile b/docs-go/Makefile index 2497cc9daf..1e3caecf23 100644 --- a/docs-go/Makefile +++ b/docs-go/Makefile @@ -1,6 +1,6 @@ WEAVE=$(HOME)/go/bin/weave -all: $(WEAVE) get-started-go.md flows.md models.md prompts.md dotprompt.md +all: $(WEAVE) get-started-go.md flows.md models.md prompts.md dotprompt.md pgvector.md $(WEAVE): ../go/internal/cmd/weave/*.go go -C ../go install ./internal/cmd/weave diff --git a/docs-go/pgvector.md b/docs-go/pgvector.md new file mode 100644 index 0000000000..f1faeee39a --- /dev/null +++ b/docs-go/pgvector.md @@ -0,0 +1,85 @@ + + +# pgvector retriever template + +You can use PostgreSQL and `pgvector` as your retriever implementation. Use the +following examples as a starting point and modify it to work with your database +schema. + +We use [database/sql](https://pkg.go.dev/database/sql) to connect to the +Postgres server, but you may use another client library of your choice. + +- {Go} + + ```go + func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever { + f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}}) + if err != nil { + return nil, err + } + rows, err := db.QueryContext(ctx, ` + SELECT episode_id, season_number, chunk as content + FROM embeddings + WHERE show_id = $1 + ORDER BY embedding <#> $2 + LIMIT 2`, + req.Options, pgv.NewVector(eres.Embeddings[0].Embedding)) + if err != nil { + return nil, err + } + defer rows.Close() + + res := &ai.RetrieverResponse{} + for rows.Next() { + var eid, sn int + var content string + if err := rows.Scan(&eid, &sn, &content); err != nil { + return nil, err + } + meta := map[string]any{ + "episode_id": eid, + "season_number": sn, + } + doc := &ai.Document{ + Content: []*ai.Part{ai.NewTextPart(content)}, + Metadata: meta, + } + res.Documents = append(res.Documents, doc) + } + if err := rows.Err(); err != nil { + return nil, err + } + return res, nil + } + return ai.DefineRetriever(provider, "shows", f) + } + ``` + +And here's how to use the retriever in a flow: + +- {Go} + + ```go + retriever := defineRetriever(db, embedder) + + type input struct { + Question string + Show string + } + + genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) { + res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{ + Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}}, + Options: in.Show, + }) + if err != nil { + return "", err + } + for _, doc := range res.Documents { + fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text) + } + // Use documents in RAG prompts. + return "", nil + }) + ``` diff --git a/docs-go/pgvector.src b/docs-go/pgvector.src new file mode 100644 index 0000000000..b1b81a605d --- /dev/null +++ b/docs-go/pgvector.src @@ -0,0 +1,18 @@ +# pgvector retriever template + +You can use PostgreSQL and `pgvector` as your retriever implementation. Use the +following examples as a starting point and modify it to work with your database +schema. + +We use [database/sql](https://pkg.go.dev/database/sql) to connect to the +Postgres server, but you may use another client library of your choice. + +- {Go} + + %include ../go/samples/pgvector/main.go retr + +And here's how to use the retriever in a flow: + +- {Go} + + %include ../go/samples/pgvector/main.go use-retr diff --git a/docs/templates/pgvector b/docs/templates/pgvector deleted file mode 100644 index fa5d52beb5..0000000000 --- a/docs/templates/pgvector +++ /dev/null @@ -1,108 +0,0 @@ -# pgvector retriever template - -You can use PostgreSQL and `pgvector` as your retriever implementation. Use the -following examples as a starting point and modify it to work with your database -schema. - -For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the -Postgres client, but you may use another client libaray of your choice. - -
- -
-

Node.js (Typescript)

-

```js - import { embed } from '@genkit-ai/ai/embedder'; - import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever'; - import { defineFlow } from '@genkit-ai/flow'; - import { textEmbeddingGecko } from '@genkit-ai/vertexai'; - import { toSql } from 'pgvector'; - import postgres from 'postgres'; - import { z } from 'zod'; - - const sql = postgres({ ssl: false, database: 'recaps' }); - - const QueryOptions = z.object({ - show: z.string(), - k: z.number().optional(), - }); - - const sqlRetriever = defineRetriever( - { - name: 'pgvector-myTable', - configSchema: QueryOptions, - }, - async (input, options) => { - const embedding = await embed({ - embedder: textEmbeddingGecko, - content: input, - }); - const results = await sql - SELECT episode_id, season_number, chunk as content - FROM embeddings - WHERE show_id = ${options.show} - ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3} - ; - return { - documents: results.map((row) => { - const { content, ...metadata } = row; - return Document.fromText(content, metadata); - }), - }; - } - ); - ``` -

-
-
-

Go

-

- %include ../go/samples/pgvector/main.go retr -

-
- -
-
- -And here's how to use the retriever in a flow: - -
- -
-

Node.js (Typescript)

-

- ```js - // Simple flow to use the sqlRetriever - export const askQuestionsOnGoT = defineFlow( - { - name: 'askQuestionsOnGoT', - inputSchema: z.string(), - outputSchema: z.string(), - }, - async (inputQuestion) => { - const docs = await retrieve({ - retriever: sqlRetriever, - query: inputQuestion, - options: { - show: 'Game of Thrones', - }, - }); - console.log(docs); - - // Continue with using retrieved docs - // in RAG prompts. - //... - } - ); - ``` -

-
-
-

Go

-

- %include ../go/samples/pgvector/main.go use-retr -

-
- -
-
\ No newline at end of file diff --git a/docs/templates/pgvector.md b/docs/templates/pgvector.md index e2f1e19f28..be8cf3b0cb 100644 --- a/docs/templates/pgvector.md +++ b/docs/templates/pgvector.md @@ -1,175 +1,74 @@ - - # pgvector retriever template You can use PostgreSQL and `pgvector` as your retriever implementation. Use the -following examples as a starting point and modify it to work with your database +following example as a starting point and modify it to work with your database schema. -For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the -Postgres client, but you may use another client libaray of your choice. - -
- -
-

Node.js (Typescript)

-

```js - import { embed } from '@genkit-ai/ai/embedder'; - import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever'; - import { defineFlow } from '@genkit-ai/flow'; - import { textEmbeddingGecko } from '@genkit-ai/vertexai'; - import { toSql } from 'pgvector'; - import postgres from 'postgres'; - import { z } from 'zod'; - - const sql = postgres({ ssl: false, database: 'recaps' }); - - const QueryOptions = z.object({ - show: z.string(), - k: z.number().optional(), - }); +```js +import { embed } from '@genkit-ai/ai/embedder'; +import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever'; +import { defineFlow } from '@genkit-ai/flow'; +import { textEmbeddingGecko } from '@genkit-ai/vertexai'; +import { toSql } from 'pgvector'; +import postgres from 'postgres'; +import { z } from 'zod'; - const sqlRetriever = defineRetriever( - { - name: 'pgvector-myTable', - configSchema: QueryOptions, - }, - async (input, options) => { - const embedding = await embed({ - embedder: textEmbeddingGecko, - content: input, - }); - const results = await sql - SELECT episode_id, season_number, chunk as content - FROM embeddings - WHERE show_id = ${options.show} - ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3} - ; - return { - documents: results.map((row) => { - const { content, ...metadata } = row; - return Document.fromText(content, metadata); - }), - }; - } - ); - ``` -

-
-
-

Go

-

- ```go - func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever { - f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { - eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}}) - if err != nil { - return nil, err - } - rows, err := db.QueryContext(ctx, ` - SELECT episode_id, season_number, chunk as content - FROM embeddings - WHERE show_id = $1 - ORDER BY embedding <#> $2 - LIMIT 2`, - req.Options, pgv.NewVector(eres.Embeddings[0].Embedding)) - if err != nil { - return nil, err - } - defer rows.Close() +const sql = postgres({ ssl: false, database: 'recaps' }); - res := &ai.RetrieverResponse{} - for rows.Next() { - var eid, sn int - var content string - if err := rows.Scan(&eid, &sn, &content); err != nil { - return nil, err - } - meta := map[string]any{ - "episode_id": eid, - "season_number": sn, - } - doc := &ai.Document{ - Content: []*ai.Part{ai.NewTextPart(content)}, - Metadata: meta, - } - res.Documents = append(res.Documents, doc) - } - if err := rows.Err(); err != nil { - return nil, err - } - return res, nil - } - return ai.DefineRetriever(provider, "shows", f) - } - ``` -

-
+const QueryOptions = z.object({ + show: z.string(), + k: z.number().optional(), +}); -
-
+const sqlRetriever = defineRetriever( + { + name: 'pgvector-myTable', + configSchema: QueryOptions, + }, + async (input, options) => { + const embedding = await embed({ + embedder: textEmbeddingGecko, + content: input, + }); + const results = await sql` + SELECT episode_id, season_number, chunk as content + FROM embeddings + WHERE show_id = ${options.show} + ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3} + `; + return { + documents: results.map((row) => { + const { content, ...metadata } = row; + return Document.fromText(content, metadata); + }), + }; + } +); +``` And here's how to use the retriever in a flow: -
- -
-

Node.js (Typescript)

-

- ```js - // Simple flow to use the sqlRetriever - export const askQuestionsOnGoT = defineFlow( - { - name: 'askQuestionsOnGoT', - inputSchema: z.string(), - outputSchema: z.string(), - }, - async (inputQuestion) => { - const docs = await retrieve({ - retriever: sqlRetriever, - query: inputQuestion, - options: { - show: 'Game of Thrones', - }, - }); - console.log(docs); - - // Continue with using retrieved docs - // in RAG prompts. - //... - } - ); - ``` -

-
-
-

Go

-

- ```go - retriever := defineRetriever(db, embedder) - - type input struct { - Question string - Show string - } - - genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) { - res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{ - Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}}, - Options: in.Show, - }) - if err != nil { - return "", err - } - for _, doc := range res.Documents { - fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text) - } - // Use documents in RAG prompts. - return "", nil - }) - ``` -

-
+```js +// Simple flow to use the sqlRetriever +export const askQuestionsOnGoT = defineFlow( + { + name: 'askQuestionsOnGoT', + inputSchema: z.string(), + outputSchema: z.string(), + }, + async (inputQuestion) => { + const docs = await retrieve({ + retriever: sqlRetriever, + query: inputQuestion, + options: { + show: 'Game of Thrones', + }, + }); + console.log(docs); -
-
+ // Continue with using retrieved docs + // in RAG prompts. + //... + } +); +``` diff --git a/go/samples/pgvector/main.go b/go/samples/pgvector/main.go index 4b940654c1..023b70cc91 100644 --- a/go/samples/pgvector/main.go +++ b/go/samples/pgvector/main.go @@ -26,7 +26,6 @@ // curl -d '{"Show": "Best Friends", "Question": "Who does Alice love?"}' http://localhost:3400/askQuestion package main -// !+imports import ( "context" "database/sql" @@ -42,8 +41,6 @@ import ( pgv "github.com/pgvector/pgvector-go" ) -// !-imports - var ( connString = flag.String("dbconn", "", "database connection string") apiKey = flag.String("apikey", "", "Gemini API key") From 768e36ec8c7936e7cf6833ea80a11d927b14948a Mon Sep 17 00:00:00 2001 From: Samuel Bushi Date: Thu, 11 Jul 2024 21:49:11 +0000 Subject: [PATCH 5/5] format --- docs-go/pgvector.md | 3 +-- docs-go/pgvector.src | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docs-go/pgvector.md b/docs-go/pgvector.md index f1faeee39a..14cfd1cc21 100644 --- a/docs-go/pgvector.md +++ b/docs-go/pgvector.md @@ -6,8 +6,7 @@ You can use PostgreSQL and `pgvector` as your retriever implementation. Use the following examples as a starting point and modify it to work with your database schema. -We use [database/sql](https://pkg.go.dev/database/sql) to connect to the -Postgres server, but you may use another client library of your choice. +We use [database/sql](https://pkg.go.dev/database/sql) to connect to the Postgres server, but you may use another client library of your choice. - {Go} diff --git a/docs-go/pgvector.src b/docs-go/pgvector.src index b1b81a605d..b9126ce946 100644 --- a/docs-go/pgvector.src +++ b/docs-go/pgvector.src @@ -4,8 +4,7 @@ You can use PostgreSQL and `pgvector` as your retriever implementation. Use the following examples as a starting point and modify it to work with your database schema. -We use [database/sql](https://pkg.go.dev/database/sql) to connect to the -Postgres server, but you may use another client library of your choice. +We use [database/sql](https://pkg.go.dev/database/sql) to connect to the Postgres server, but you may use another client library of your choice. - {Go}