From 81a144957cddc5ffaf0bed1f1e772c596168377e Mon Sep 17 00:00:00 2001
From: Jonathan Amsterdam
Date: Thu, 11 Jul 2024 09:14:10 -0400
Subject: [PATCH 1/5] [Go] pgvector sample
A sample demonstrating how to use Postgres's vector extension
to build an indexer and retriever.
Formerly #375.
---
go/go.mod | 2 +
go/go.sum | 4 +
go/samples/pgvector/main.go | 218 +++++++++++++++++++++++++++++++
go/samples/pgvector/pgvector.sql | 22 ++++
4 files changed, 246 insertions(+)
create mode 100644 go/samples/pgvector/main.go
create mode 100644 go/samples/pgvector/pgvector.sql
diff --git a/go/go.mod b/go/go.mod
index 60f1a33dcb..5ac1aae651 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -49,7 +49,9 @@ require (
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.4 // indirect
github.com/kr/text v0.2.0 // indirect
+ github.com/lib/pq v1.10.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
+ github.com/pgvector/pgvector-go v0.2.0 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
go.opencensus.io v0.24.0 // indirect
diff --git a/go/go.sum b/go/go.sum
index ea68f9f6e6..fb742fe227 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -100,8 +100,12 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/pgvector/pgvector-go v0.2.0 h1:NZdW4NxUxdSCzaev3LVHb9ORf+LdX+uZOQVqQ6s2Zyg=
+github.com/pgvector/pgvector-go v0.2.0/go.mod h1:OQpvU5QZGQOPI9quIXAyHaRZ5yGk/RGUDbs9C3DPUNE=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
diff --git a/go/samples/pgvector/main.go b/go/samples/pgvector/main.go
new file mode 100644
index 0000000000..de3f6bc692
--- /dev/null
+++ b/go/samples/pgvector/main.go
@@ -0,0 +1,218 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This program shows how to use Postgres's pgvector extension with Genkit.
+
+// This program can be manually tested like so:
+//
+// In development mode (with the environment variable GENKIT_ENV="dev"):
+// Start the server listening on port 3100:
+//
+// go run . -dbconn "$DBCONN" -apikey $API_KEY &
+//
+// Ask a question:
+//
+// curl -d '{"Show": "Best Friends", "Question": "Who does Alice love?"}' http://localhost:3400/askQuestion
+package main
+
+import (
+ "context"
+ "database/sql"
+ "errors"
+ "flag"
+ "fmt"
+ "log"
+
+ "github.com/firebase/genkit/go/ai"
+ "github.com/firebase/genkit/go/genkit"
+ "github.com/firebase/genkit/go/plugins/googleai"
+ _ "github.com/lib/pq"
+ pgv "github.com/pgvector/pgvector-go"
+)
+
+var (
+ connString = flag.String("dbconn", "", "database connection string")
+ apiKey = flag.String("apikey", "", "Gemini API key")
+ index = flag.Bool("index", false, "index the existing data")
+)
+
+func main() {
+ flag.Parse()
+ if err := run(); err != nil {
+ log.Fatal(err)
+ }
+}
+
+func run() error {
+ if *connString == "" {
+ return errors.New("need -dbconn")
+ }
+ if *apiKey == "" {
+ return errors.New("need -apikey")
+ }
+ ctx := context.Background()
+ if err := googleai.Init(ctx, &googleai.Config{APIKey: *apiKey}); err != nil {
+ return err
+ }
+ const embedderName = "embedding-001"
+ embedder := googleai.Embedder(embedderName)
+ if embedder == nil {
+ return fmt.Errorf("embedder %s is not known to the googleai plugin", embedderName)
+ }
+
+ db, err := sql.Open("postgres", *connString)
+ if err != nil {
+ return err
+ }
+ defer db.Close()
+
+ if *index {
+ indexer := defineIndexer(db, embedder)
+ if err := indexExistingRows(ctx, db, indexer); err != nil {
+ return err
+ }
+ }
+
+ retriever := defineRetriever(db, embedder)
+
+ type input struct {
+ Question string
+ Show string
+ }
+
+ genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) {
+ res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{
+ Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}},
+ Options: in.Show,
+ })
+ if err != nil {
+ return "", err
+ }
+ for _, doc := range res.Documents {
+ fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text)
+ }
+ // Use documents in RAG prompts.
+ return "", nil
+ })
+
+ return genkit.Init(ctx, nil)
+}
+
+const provider = "pgvector"
+
+func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever {
+ f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
+ eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}})
+ if err != nil {
+ return nil, err
+ }
+ rows, err := db.QueryContext(ctx, `
+ SELECT episode_id, season_number, chunk as content
+ FROM embeddings
+ WHERE show_id = $1
+ ORDER BY embedding <#> $2
+ LIMIT 2`,
+ req.Options, pgv.NewVector(eres.Embeddings[0].Embedding))
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ res := &ai.RetrieverResponse{}
+ for rows.Next() {
+ var eid, sn int
+ var content string
+ if err := rows.Scan(&eid, &sn, &content); err != nil {
+ return nil, err
+ }
+ meta := map[string]any{
+ "episode_id": eid,
+ "season_number": sn,
+ }
+ doc := &ai.Document{
+ Content: []*ai.Part{ai.NewTextPart(content)},
+ Metadata: meta,
+ }
+ res.Documents = append(res.Documents, doc)
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return res, nil
+ }
+ return ai.DefineRetriever(provider, "shows", f)
+}
+
+func defineIndexer(db *sql.DB, embedder *ai.Embedder) *ai.Indexer {
+ // The indexer assumes that each Document has a single part, to be embedded, and metadata fields
+ // for the table primary key: show_id, season_number, episode_id.
+ const query = `
+ UPDATE embeddings
+ SET embedding = $4
+ WHERE show_id = $1 AND season_number = $2 AND episode_id = $3
+ `
+ return ai.DefineIndexer(provider, "shows", func(ctx context.Context, req *ai.IndexerRequest) error {
+ res, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: req.Documents})
+ if err != nil {
+ return err
+ }
+ // You may want to use your database's batch functionality to insert the embeddings
+ // more efficiently.
+ for i, emb := range res.Embeddings {
+ doc := req.Documents[i]
+ args := make([]any, 4)
+ for j, k := range []string{"show_id", "season_number", "episode_id"} {
+ if a, ok := doc.Metadata[k]; ok {
+ args[j] = a
+ } else {
+ return fmt.Errorf("doc[%d]: missing metadata key %q", i, k)
+ }
+ }
+ args[3] = pgv.NewVector(emb.Embedding)
+ if _, err := db.ExecContext(ctx, query, args...); err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+}
+
+func indexExistingRows(ctx context.Context, db *sql.DB, indexer *ai.Indexer) error {
+ rows, err := db.QueryContext(ctx, `SELECT show_id, season_number, episode_id, chunk FROM embeddings`)
+ if err != nil {
+ return err
+ }
+ defer rows.Close()
+
+ req := &ai.IndexerRequest{}
+ for rows.Next() {
+ var sid, chunk string
+ var sn, eid int
+ if err := rows.Scan(&sid, &sn, &eid, &chunk); err != nil {
+ return err
+ }
+ req.Documents = append(req.Documents, &ai.Document{
+ Content: []*ai.Part{ai.NewTextPart(chunk)},
+ Metadata: map[string]any{
+ "show_id": sid,
+ "season_number": sn,
+ "episode_id": eid,
+ },
+ })
+ }
+ if err := rows.Err(); err != nil {
+ return err
+ }
+ return indexer.Index(ctx, req)
+}
diff --git a/go/samples/pgvector/pgvector.sql b/go/samples/pgvector/pgvector.sql
new file mode 100644
index 0000000000..a252e9e3d5
--- /dev/null
+++ b/go/samples/pgvector/pgvector.sql
@@ -0,0 +1,22 @@
+-- This SQL enables the vector extension and creates the table and data used
+-- in the accompanying sample.
+
+CREATE EXTENSION IF NOT EXISTS vector;
+
+CREATE TABLE embeddings (
+ show_id TEXT NOT NULL,
+ season_number INTEGER NOT NULL,
+ episode_id INTEGER NOT NULL,
+ chunk TEXT,
+ embedding vector(768),
+ PRIMARY KEY (show_id, season_number, episode_id)
+);
+
+INSERT INTO embeddings (show_id, season_number, episode_id, chunk) VALUES
+ ('La Vie', 1, 1, 'Natasha confesses her love for Pierre.'),
+ ('La Vie', 1, 2, 'Pierre and Natasha become engaged.'),
+ ('La Vie', 1, 3, 'Margot and Henri divorce.'),
+ ('Best Friends', 1, 1, 'Alice confesses her love for Oscar.'),
+ ('Best Friends', 1, 2, 'Oscar and Alice become engaged.'),
+ ('Best Friends', 1, 3, 'Bob and Pat divorce.')
+;
From 2e3007ff2146f8b39d863cf05867b6758e2aee32 Mon Sep 17 00:00:00 2001
From: Samuel Bushi
Date: Thu, 11 Jul 2024 14:48:46 +0000
Subject: [PATCH 2/5] Update pgvector docs to include Go
---
docs/templates/pgvector | 107 +++++++++++++++++
docs/templates/pgvector.md | 224 ++++++++++++++++++++++++++----------
go/samples/pgvector/main.go | 8 ++
3 files changed, 277 insertions(+), 62 deletions(-)
create mode 100644 docs/templates/pgvector
diff --git a/docs/templates/pgvector b/docs/templates/pgvector
new file mode 100644
index 0000000000..90a63965c1
--- /dev/null
+++ b/docs/templates/pgvector
@@ -0,0 +1,107 @@
+# pgvector retriever template
+
+You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
+following examples as a starting point and modify it to work with your database
+schema.
+
+For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the
+Postgres client, but you may use another client libaray of your choice.
+
+
+
+
+ Node.js (Typescript)
+ ```js
+ import { embed } from '@genkit-ai/ai/embedder';
+ import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever';
+ import { defineFlow } from '@genkit-ai/flow';
+ import { textEmbeddingGecko } from '@genkit-ai/vertexai';
+ import { toSql } from 'pgvector';
+ import postgres from 'postgres';
+ import { z } from 'zod';
+
+ const sql = postgres({ ssl: false, database: 'recaps' });
+
+ const QueryOptions = z.object({
+ show: z.string(),
+ k: z.number().optional(),
+ });
+
+ const sqlRetriever = defineRetriever(
+ {
+ name: 'pgvector-myTable',
+ configSchema: QueryOptions,
+ },
+ async (input, options) => {
+ const embedding = await embed({
+ embedder: textEmbeddingGecko,
+ content: input,
+ });
+ const results = await sql
+ SELECT episode_id, season_number, chunk as content
+ FROM embeddings
+ WHERE show_id = ${options.show}
+ ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3}
+ ;
+ return {
+ documents: results.map((row) => {
+ const { content, ...metadata } = row;
+ return Document.fromText(content, metadata);
+ }),
+ };
+ }
+ );
+ ```
+
+
+
+ Go
+
+ %include ../go/samples/pgvector/main.go retr
+
+
+
+
+
+
+And here's how to use the retriever in a flow:
+
+
+
+
+ Node.js (Typescript)
+
+ ```js
+ // Simple flow to use the sqlRetriever
+ export const askQuestionsOnGoT = defineFlow(
+ {
+ name: 'askQuestionsOnGoT',
+ inputSchema: z.string(),
+ outputSchema: z.string(),
+ },
+ async (inputQuestion) => {
+ const docs = await retrieve({
+ retriever: sqlRetriever,
+ query: inputQuestion,
+ options: {
+ show: 'Game of Thrones',
+ },
+ });
+ console.log(docs);
+
+ // Continue with using retrieved docs
+ // in RAG prompts.
+ //...
+ }
+ );
+ ```
+
+
+
+ Go
+
+ %include ../go/samples/pgvector/main.go use-retr
+
+
+
+
\ No newline at end of file
diff --git a/docs/templates/pgvector.md b/docs/templates/pgvector.md
index be8cf3b0cb..62a6899ab2 100644
--- a/docs/templates/pgvector.md
+++ b/docs/templates/pgvector.md
@@ -1,74 +1,174 @@
+
+
# pgvector retriever template
You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
-following example as a starting point and modify it to work with your database
+following examples as a starting point and modify it to work with your database
schema.
-```js
-import { embed } from '@genkit-ai/ai/embedder';
-import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever';
-import { defineFlow } from '@genkit-ai/flow';
-import { textEmbeddingGecko } from '@genkit-ai/vertexai';
-import { toSql } from 'pgvector';
-import postgres from 'postgres';
-import { z } from 'zod';
+For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the
+Postgres client, but you may use another client libaray of your choice.
+
+
+
+
+ Node.js (Typescript)
+ ```js
+ import { embed } from '@genkit-ai/ai/embedder';
+ import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever';
+ import { defineFlow } from '@genkit-ai/flow';
+ import { textEmbeddingGecko } from '@genkit-ai/vertexai';
+ import { toSql } from 'pgvector';
+ import postgres from 'postgres';
+ import { z } from 'zod';
+
+ const sql = postgres({ ssl: false, database: 'recaps' });
-const sql = postgres({ ssl: false, database: 'recaps' });
+ const QueryOptions = z.object({
+ show: z.string(),
+ k: z.number().optional(),
+ });
-const QueryOptions = z.object({
- show: z.string(),
- k: z.number().optional(),
-});
+ const sqlRetriever = defineRetriever(
+ {
+ name: 'pgvector-myTable',
+ configSchema: QueryOptions,
+ },
+ async (input, options) => {
+ const embedding = await embed({
+ embedder: textEmbeddingGecko,
+ content: input,
+ });
+ const results = await sql
+ SELECT episode_id, season_number, chunk as content
+ FROM embeddings
+ WHERE show_id = ${options.show}
+ ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3}
+ ;
+ return {
+ documents: results.map((row) => {
+ const { content, ...metadata } = row;
+ return Document.fromText(content, metadata);
+ }),
+ };
+ }
+ );
+ ```
+
+
+
+ Go
+
+ ```go
+ func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever {
+ f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
+ eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}})
+ if err != nil {
+ return nil, err
+ }
+ rows, err := db.QueryContext(ctx, `
+ SELECT episode_id, season_number, chunk as content
+ FROM embeddings
+ WHERE show_id = $1
+ ORDER BY embedding <#> $2
+ LIMIT 2`,
+ req.Options, pgv.NewVector(eres.Embeddings[0].Embedding))
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
-const sqlRetriever = defineRetriever(
- {
- name: 'pgvector-myTable',
- configSchema: QueryOptions,
- },
- async (input, options) => {
- const embedding = await embed({
- embedder: textEmbeddingGecko,
- content: input,
- });
- const results = await sql`
- SELECT episode_id, season_number, chunk as content
- FROM embeddings
- WHERE show_id = ${options.show}
- ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3}
- `;
- return {
- documents: results.map((row) => {
- const { content, ...metadata } = row;
- return Document.fromText(content, metadata);
- }),
- };
- }
-);
-```
+ res := &ai.RetrieverResponse{}
+ for rows.Next() {
+ var eid, sn int
+ var content string
+ if err := rows.Scan(&eid, &sn, &content); err != nil {
+ return nil, err
+ }
+ meta := map[string]any{
+ "episode_id": eid,
+ "season_number": sn,
+ }
+ doc := &ai.Document{
+ Content: []*ai.Part{ai.NewTextPart(content)},
+ Metadata: meta,
+ }
+ res.Documents = append(res.Documents, doc)
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return res, nil
+ }
+ return ai.DefineRetriever(provider, "shows", f)
+ }
+ ```
+
+
+
+
+
And here's how to use the retriever in a flow:
-```js
-// Simple flow to use the sqlRetriever
-export const askQuestionsOnGoT = defineFlow(
- {
- name: 'askQuestionsOnGoT',
- inputSchema: z.string(),
- outputSchema: z.string(),
- },
- async (inputQuestion) => {
- const docs = await retrieve({
- retriever: sqlRetriever,
- query: inputQuestion,
- options: {
- show: 'Game of Thrones',
- },
- });
- console.log(docs);
+
+
+
+ Node.js (Typescript)
+
+ ```js
+ // Simple flow to use the sqlRetriever
+ export const askQuestionsOnGoT = defineFlow(
+ {
+ name: 'askQuestionsOnGoT',
+ inputSchema: z.string(),
+ outputSchema: z.string(),
+ },
+ async (inputQuestion) => {
+ const docs = await retrieve({
+ retriever: sqlRetriever,
+ query: inputQuestion,
+ options: {
+ show: 'Game of Thrones',
+ },
+ });
+ console.log(docs);
+
+ // Continue with using retrieved docs
+ // in RAG prompts.
+ //...
+ }
+ );
+ ```
+
+
+
+ Go
+
+ ```go
+ retriever := defineRetriever(db, embedder)
+
+ type input struct {
+ Question string
+ Show string
+ }
- // Continue with using retrieved docs
- // in RAG prompts.
- //...
- }
-);
-```
+ genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) {
+ res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{
+ Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}},
+ Options: in.Show,
+ })
+ if err != nil {
+ return "", err
+ }
+ for _, doc := range res.Documents {
+ fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text)
+ }
+ // Use documents in RAG prompts.
+ return "", nil
+ })
+ ```
+
+
+
+
diff --git a/go/samples/pgvector/main.go b/go/samples/pgvector/main.go
index de3f6bc692..4b940654c1 100644
--- a/go/samples/pgvector/main.go
+++ b/go/samples/pgvector/main.go
@@ -26,6 +26,7 @@
// curl -d '{"Show": "Best Friends", "Question": "Who does Alice love?"}' http://localhost:3400/askQuestion
package main
+// !+imports
import (
"context"
"database/sql"
@@ -41,6 +42,8 @@ import (
pgv "github.com/pgvector/pgvector-go"
)
+// !-imports
+
var (
connString = flag.String("dbconn", "", "database connection string")
apiKey = flag.String("apikey", "", "Gemini API key")
@@ -84,6 +87,7 @@ func run() error {
}
}
+ // !+use-retr
retriever := defineRetriever(db, embedder)
type input struct {
@@ -105,12 +109,14 @@ func run() error {
// Use documents in RAG prompts.
return "", nil
})
+ // !-use-retr
return genkit.Init(ctx, nil)
}
const provider = "pgvector"
+// !+retr
func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever {
f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}})
@@ -154,6 +160,8 @@ func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever {
return ai.DefineRetriever(provider, "shows", f)
}
+// !-retr
+
func defineIndexer(db *sql.DB, embedder *ai.Embedder) *ai.Indexer {
// The indexer assumes that each Document has a single part, to be embedded, and metadata fields
// for the table primary key: show_id, season_number, episode_id.
From 0b5d649467b9a758bc2c318ef0bc5e402e1721ea Mon Sep 17 00:00:00 2001
From: Samuel Bushi
Date: Thu, 11 Jul 2024 14:56:07 +0000
Subject: [PATCH 3/5] Format
---
docs/templates/pgvector | 1 +
docs/templates/pgvector.md | 1 +
2 files changed, 2 insertions(+)
diff --git a/docs/templates/pgvector b/docs/templates/pgvector
index 90a63965c1..fa5d52beb5 100644
--- a/docs/templates/pgvector
+++ b/docs/templates/pgvector
@@ -103,5 +103,6 @@ And here's how to use the retriever in a flow:
%include ../go/samples/pgvector/main.go use-retr
+
\ No newline at end of file
diff --git a/docs/templates/pgvector.md b/docs/templates/pgvector.md
index 62a6899ab2..e2f1e19f28 100644
--- a/docs/templates/pgvector.md
+++ b/docs/templates/pgvector.md
@@ -170,5 +170,6 @@ And here's how to use the retriever in a flow:
```
+
From df17e794229fed51e52c5871ee9e7ce8db96d830 Mon Sep 17 00:00:00 2001
From: Samuel Bushi
Date: Thu, 11 Jul 2024 21:40:56 +0000
Subject: [PATCH 4/5] Separate go docs
---
docs-go/Makefile | 2 +-
docs-go/pgvector.md | 85 ++++++++++++++
docs-go/pgvector.src | 18 +++
docs/templates/pgvector | 108 -----------------
docs/templates/pgvector.md | 225 ++++++++++--------------------------
go/samples/pgvector/main.go | 3 -
6 files changed, 166 insertions(+), 275 deletions(-)
create mode 100644 docs-go/pgvector.md
create mode 100644 docs-go/pgvector.src
delete mode 100644 docs/templates/pgvector
diff --git a/docs-go/Makefile b/docs-go/Makefile
index 2497cc9daf..1e3caecf23 100644
--- a/docs-go/Makefile
+++ b/docs-go/Makefile
@@ -1,6 +1,6 @@
WEAVE=$(HOME)/go/bin/weave
-all: $(WEAVE) get-started-go.md flows.md models.md prompts.md dotprompt.md
+all: $(WEAVE) get-started-go.md flows.md models.md prompts.md dotprompt.md pgvector.md
$(WEAVE): ../go/internal/cmd/weave/*.go
go -C ../go install ./internal/cmd/weave
diff --git a/docs-go/pgvector.md b/docs-go/pgvector.md
new file mode 100644
index 0000000000..f1faeee39a
--- /dev/null
+++ b/docs-go/pgvector.md
@@ -0,0 +1,85 @@
+
+
+# pgvector retriever template
+
+You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
+following examples as a starting point and modify it to work with your database
+schema.
+
+We use [database/sql](https://pkg.go.dev/database/sql) to connect to the
+Postgres server, but you may use another client library of your choice.
+
+- {Go}
+
+ ```go
+ func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever {
+ f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
+ eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}})
+ if err != nil {
+ return nil, err
+ }
+ rows, err := db.QueryContext(ctx, `
+ SELECT episode_id, season_number, chunk as content
+ FROM embeddings
+ WHERE show_id = $1
+ ORDER BY embedding <#> $2
+ LIMIT 2`,
+ req.Options, pgv.NewVector(eres.Embeddings[0].Embedding))
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ res := &ai.RetrieverResponse{}
+ for rows.Next() {
+ var eid, sn int
+ var content string
+ if err := rows.Scan(&eid, &sn, &content); err != nil {
+ return nil, err
+ }
+ meta := map[string]any{
+ "episode_id": eid,
+ "season_number": sn,
+ }
+ doc := &ai.Document{
+ Content: []*ai.Part{ai.NewTextPart(content)},
+ Metadata: meta,
+ }
+ res.Documents = append(res.Documents, doc)
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return res, nil
+ }
+ return ai.DefineRetriever(provider, "shows", f)
+ }
+ ```
+
+And here's how to use the retriever in a flow:
+
+- {Go}
+
+ ```go
+ retriever := defineRetriever(db, embedder)
+
+ type input struct {
+ Question string
+ Show string
+ }
+
+ genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) {
+ res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{
+ Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}},
+ Options: in.Show,
+ })
+ if err != nil {
+ return "", err
+ }
+ for _, doc := range res.Documents {
+ fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text)
+ }
+ // Use documents in RAG prompts.
+ return "", nil
+ })
+ ```
diff --git a/docs-go/pgvector.src b/docs-go/pgvector.src
new file mode 100644
index 0000000000..b1b81a605d
--- /dev/null
+++ b/docs-go/pgvector.src
@@ -0,0 +1,18 @@
+# pgvector retriever template
+
+You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
+following examples as a starting point and modify it to work with your database
+schema.
+
+We use [database/sql](https://pkg.go.dev/database/sql) to connect to the
+Postgres server, but you may use another client library of your choice.
+
+- {Go}
+
+ %include ../go/samples/pgvector/main.go retr
+
+And here's how to use the retriever in a flow:
+
+- {Go}
+
+ %include ../go/samples/pgvector/main.go use-retr
diff --git a/docs/templates/pgvector b/docs/templates/pgvector
deleted file mode 100644
index fa5d52beb5..0000000000
--- a/docs/templates/pgvector
+++ /dev/null
@@ -1,108 +0,0 @@
-# pgvector retriever template
-
-You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
-following examples as a starting point and modify it to work with your database
-schema.
-
-For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the
-Postgres client, but you may use another client libaray of your choice.
-
-
-
-
- Node.js (Typescript)
- ```js
- import { embed } from '@genkit-ai/ai/embedder';
- import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever';
- import { defineFlow } from '@genkit-ai/flow';
- import { textEmbeddingGecko } from '@genkit-ai/vertexai';
- import { toSql } from 'pgvector';
- import postgres from 'postgres';
- import { z } from 'zod';
-
- const sql = postgres({ ssl: false, database: 'recaps' });
-
- const QueryOptions = z.object({
- show: z.string(),
- k: z.number().optional(),
- });
-
- const sqlRetriever = defineRetriever(
- {
- name: 'pgvector-myTable',
- configSchema: QueryOptions,
- },
- async (input, options) => {
- const embedding = await embed({
- embedder: textEmbeddingGecko,
- content: input,
- });
- const results = await sql
- SELECT episode_id, season_number, chunk as content
- FROM embeddings
- WHERE show_id = ${options.show}
- ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3}
- ;
- return {
- documents: results.map((row) => {
- const { content, ...metadata } = row;
- return Document.fromText(content, metadata);
- }),
- };
- }
- );
- ```
-
-
-
- Go
-
- %include ../go/samples/pgvector/main.go retr
-
-
-
-
-
-
-And here's how to use the retriever in a flow:
-
-
-
-
- Node.js (Typescript)
-
- ```js
- // Simple flow to use the sqlRetriever
- export const askQuestionsOnGoT = defineFlow(
- {
- name: 'askQuestionsOnGoT',
- inputSchema: z.string(),
- outputSchema: z.string(),
- },
- async (inputQuestion) => {
- const docs = await retrieve({
- retriever: sqlRetriever,
- query: inputQuestion,
- options: {
- show: 'Game of Thrones',
- },
- });
- console.log(docs);
-
- // Continue with using retrieved docs
- // in RAG prompts.
- //...
- }
- );
- ```
-
-
-
- Go
-
- %include ../go/samples/pgvector/main.go use-retr
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/templates/pgvector.md b/docs/templates/pgvector.md
index e2f1e19f28..be8cf3b0cb 100644
--- a/docs/templates/pgvector.md
+++ b/docs/templates/pgvector.md
@@ -1,175 +1,74 @@
-
-
# pgvector retriever template
You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
-following examples as a starting point and modify it to work with your database
+following example as a starting point and modify it to work with your database
schema.
-For the Golang snippet, we use [pgx](https://github.com/jackc/pgx) as the
-Postgres client, but you may use another client libaray of your choice.
-
-
-
-
- Node.js (Typescript)
- ```js
- import { embed } from '@genkit-ai/ai/embedder';
- import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever';
- import { defineFlow } from '@genkit-ai/flow';
- import { textEmbeddingGecko } from '@genkit-ai/vertexai';
- import { toSql } from 'pgvector';
- import postgres from 'postgres';
- import { z } from 'zod';
-
- const sql = postgres({ ssl: false, database: 'recaps' });
-
- const QueryOptions = z.object({
- show: z.string(),
- k: z.number().optional(),
- });
+```js
+import { embed } from '@genkit-ai/ai/embedder';
+import { Document, defineRetriever, retrieve } from '@genkit-ai/ai/retriever';
+import { defineFlow } from '@genkit-ai/flow';
+import { textEmbeddingGecko } from '@genkit-ai/vertexai';
+import { toSql } from 'pgvector';
+import postgres from 'postgres';
+import { z } from 'zod';
- const sqlRetriever = defineRetriever(
- {
- name: 'pgvector-myTable',
- configSchema: QueryOptions,
- },
- async (input, options) => {
- const embedding = await embed({
- embedder: textEmbeddingGecko,
- content: input,
- });
- const results = await sql
- SELECT episode_id, season_number, chunk as content
- FROM embeddings
- WHERE show_id = ${options.show}
- ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3}
- ;
- return {
- documents: results.map((row) => {
- const { content, ...metadata } = row;
- return Document.fromText(content, metadata);
- }),
- };
- }
- );
- ```
-
-
-
- Go
-
- ```go
- func defineRetriever(db *sql.DB, embedder *ai.Embedder) *ai.Retriever {
- f := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) {
- eres, err := embedder.Embed(ctx, &ai.EmbedRequest{Documents: []*ai.Document{req.Document}})
- if err != nil {
- return nil, err
- }
- rows, err := db.QueryContext(ctx, `
- SELECT episode_id, season_number, chunk as content
- FROM embeddings
- WHERE show_id = $1
- ORDER BY embedding <#> $2
- LIMIT 2`,
- req.Options, pgv.NewVector(eres.Embeddings[0].Embedding))
- if err != nil {
- return nil, err
- }
- defer rows.Close()
+const sql = postgres({ ssl: false, database: 'recaps' });
- res := &ai.RetrieverResponse{}
- for rows.Next() {
- var eid, sn int
- var content string
- if err := rows.Scan(&eid, &sn, &content); err != nil {
- return nil, err
- }
- meta := map[string]any{
- "episode_id": eid,
- "season_number": sn,
- }
- doc := &ai.Document{
- Content: []*ai.Part{ai.NewTextPart(content)},
- Metadata: meta,
- }
- res.Documents = append(res.Documents, doc)
- }
- if err := rows.Err(); err != nil {
- return nil, err
- }
- return res, nil
- }
- return ai.DefineRetriever(provider, "shows", f)
- }
- ```
-
-
+const QueryOptions = z.object({
+ show: z.string(),
+ k: z.number().optional(),
+});
-
-
+const sqlRetriever = defineRetriever(
+ {
+ name: 'pgvector-myTable',
+ configSchema: QueryOptions,
+ },
+ async (input, options) => {
+ const embedding = await embed({
+ embedder: textEmbeddingGecko,
+ content: input,
+ });
+ const results = await sql`
+ SELECT episode_id, season_number, chunk as content
+ FROM embeddings
+ WHERE show_id = ${options.show}
+ ORDER BY embedding <#> ${toSql(embedding)} LIMIT ${options.k ?? 3}
+ `;
+ return {
+ documents: results.map((row) => {
+ const { content, ...metadata } = row;
+ return Document.fromText(content, metadata);
+ }),
+ };
+ }
+);
+```
And here's how to use the retriever in a flow:
-
-
-
- Node.js (Typescript)
-
- ```js
- // Simple flow to use the sqlRetriever
- export const askQuestionsOnGoT = defineFlow(
- {
- name: 'askQuestionsOnGoT',
- inputSchema: z.string(),
- outputSchema: z.string(),
- },
- async (inputQuestion) => {
- const docs = await retrieve({
- retriever: sqlRetriever,
- query: inputQuestion,
- options: {
- show: 'Game of Thrones',
- },
- });
- console.log(docs);
-
- // Continue with using retrieved docs
- // in RAG prompts.
- //...
- }
- );
- ```
-
-
-
- Go
-
- ```go
- retriever := defineRetriever(db, embedder)
-
- type input struct {
- Question string
- Show string
- }
-
- genkit.DefineFlow("askQuestion", func(ctx context.Context, in input) (string, error) {
- res, err := retriever.Retrieve(ctx, &ai.RetrieverRequest{
- Document: &ai.Document{Content: []*ai.Part{ai.NewTextPart(in.Question)}},
- Options: in.Show,
- })
- if err != nil {
- return "", err
- }
- for _, doc := range res.Documents {
- fmt.Printf("%+v %q\n", doc.Metadata, doc.Content[0].Text)
- }
- // Use documents in RAG prompts.
- return "", nil
- })
- ```
-
-
+```js
+// Simple flow to use the sqlRetriever
+export const askQuestionsOnGoT = defineFlow(
+ {
+ name: 'askQuestionsOnGoT',
+ inputSchema: z.string(),
+ outputSchema: z.string(),
+ },
+ async (inputQuestion) => {
+ const docs = await retrieve({
+ retriever: sqlRetriever,
+ query: inputQuestion,
+ options: {
+ show: 'Game of Thrones',
+ },
+ });
+ console.log(docs);
-
-
+ // Continue with using retrieved docs
+ // in RAG prompts.
+ //...
+ }
+);
+```
diff --git a/go/samples/pgvector/main.go b/go/samples/pgvector/main.go
index 4b940654c1..023b70cc91 100644
--- a/go/samples/pgvector/main.go
+++ b/go/samples/pgvector/main.go
@@ -26,7 +26,6 @@
// curl -d '{"Show": "Best Friends", "Question": "Who does Alice love?"}' http://localhost:3400/askQuestion
package main
-// !+imports
import (
"context"
"database/sql"
@@ -42,8 +41,6 @@ import (
pgv "github.com/pgvector/pgvector-go"
)
-// !-imports
-
var (
connString = flag.String("dbconn", "", "database connection string")
apiKey = flag.String("apikey", "", "Gemini API key")
From 768e36ec8c7936e7cf6833ea80a11d927b14948a Mon Sep 17 00:00:00 2001
From: Samuel Bushi
Date: Thu, 11 Jul 2024 21:49:11 +0000
Subject: [PATCH 5/5] format
---
docs-go/pgvector.md | 3 +--
docs-go/pgvector.src | 3 +--
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/docs-go/pgvector.md b/docs-go/pgvector.md
index f1faeee39a..14cfd1cc21 100644
--- a/docs-go/pgvector.md
+++ b/docs-go/pgvector.md
@@ -6,8 +6,7 @@ You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
following examples as a starting point and modify it to work with your database
schema.
-We use [database/sql](https://pkg.go.dev/database/sql) to connect to the
-Postgres server, but you may use another client library of your choice.
+We use [database/sql](https://pkg.go.dev/database/sql) to connect to the Postgres server, but you may use another client library of your choice.
- {Go}
diff --git a/docs-go/pgvector.src b/docs-go/pgvector.src
index b1b81a605d..b9126ce946 100644
--- a/docs-go/pgvector.src
+++ b/docs-go/pgvector.src
@@ -4,8 +4,7 @@ You can use PostgreSQL and `pgvector` as your retriever implementation. Use the
following examples as a starting point and modify it to work with your database
schema.
-We use [database/sql](https://pkg.go.dev/database/sql) to connect to the
-Postgres server, but you may use another client library of your choice.
+We use [database/sql](https://pkg.go.dev/database/sql) to connect to the Postgres server, but you may use another client library of your choice.
- {Go}