Skip to content

Commit

Permalink
chore Implement index engines (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
henomis committed Oct 14, 2023
1 parent aa668f9 commit e463f6a
Show file tree
Hide file tree
Showing 15 changed files with 924 additions and 1,160 deletions.
Expand Up @@ -7,7 +7,7 @@ import (
openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -18,20 +18,22 @@ import (

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index := index.New(
jsondb.New("db.json"),
openaiembedder.New(openaiembedder.AdaEmbeddingV2),
).WithIncludeContents(true)

docsVectorIndex := simplevectorindex.New("docs", ".", openaiEmbedder)
indexIsEmpty, _ := docsVectorIndex.IsEmpty()
indexIsEmpty, _ := index.IsEmpty(context.Background())

if indexIsEmpty {
err := ingestData(docsVectorIndex, openaiEmbedder)
err := ingestData(index)
if err != nil {
panic(err)
}
}

query := "What is the purpose of the NATO Alliance?"
similarities, err := docsVectorIndex.Query(
similarities, err := index.Query(
context.Background(),
query,
indexoption.WithTopK(3),
Expand All @@ -52,7 +54,7 @@ func main() {
documentContext += similarity.Content() + "\n\n"
}

llmOpenAI := openai.NewCompletion()
llmOpenAI := openai.NewCompletion().WithVerbose(true)
prompt1 := prompt.NewPromptTemplate(
"Based on the following context answer to the question.\n\nContext:\n{{.context}}\n\nQuestion: {{.query}}").WithInputs(
map[string]string{
Expand All @@ -74,7 +76,7 @@ func main() {
fmt.Println(output)
}

func ingestData(docsVectorIndex *simplevectorindex.Index, openaiEmbedder index.Embedder) error {
func ingestData(index *index.Index) error {

fmt.Printf("Ingesting data...")

Expand All @@ -87,7 +89,7 @@ func ingestData(docsVectorIndex *simplevectorindex.Index, openaiEmbedder index.E

documentChunks := textSplitter.SplitDocuments(documents)

err = docsVectorIndex.LoadFromDocuments(context.Background(), documentChunks)
err = index.LoadFromDocuments(context.Background(), documentChunks)
if err != nil {
return err
}
Expand Down
22 changes: 13 additions & 9 deletions examples/embeddings/knowledge_base/main.go
Expand Up @@ -8,8 +8,10 @@ import (

"github.com/henomis/lingoose/chat"
openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"

"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -23,13 +25,15 @@ const (

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index := index.New(
jsondb.New("db.json"),
openaiembedder.New(openaiembedder.AdaEmbeddingV2),
).WithIncludeContents(true)

docsVectorIndex := simplevectorindex.New("db", ".", openaiEmbedder)
indexIsEmpty, _ := docsVectorIndex.IsEmpty()
indexIsEmpty, _ := index.IsEmpty(context.Background())

if indexIsEmpty {
err := ingestData(docsVectorIndex)
err := ingestData(index)
if err != nil {
panic(err)
}
Expand All @@ -49,7 +53,7 @@ func main() {
break
}

similarities, err := docsVectorIndex.Query(context.Background(), query, indexoption.WithTopK(3))
similarities, err := index.Query(context.Background(), query, indexoption.WithTopK(3))
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -98,11 +102,11 @@ func main() {

}

func ingestData(docsVectorIndex *simplevectorindex.Index) error {
func ingestData(index *index.Index) error {

fmt.Printf("Learning Knowledge Base...")

loader := loader.NewPDFToTextLoader("./kb")
loader := loader.NewPDFToTextLoader("./kb").WithPDFToTextPath("/opt/homebrew/bin/pdftotext")

documents, err := loader.Load(context.Background())
if err != nil {
Expand All @@ -113,7 +117,7 @@ func ingestData(docsVectorIndex *simplevectorindex.Index) error {

documentChunks := textSplitter.SplitDocuments(documents)

err = docsVectorIndex.LoadFromDocuments(context.Background(), documentChunks)
err = index.LoadFromDocuments(context.Background(), documentChunks)
if err != nil {
return err
}
Expand Down
42 changes: 21 additions & 21 deletions examples/embeddings/pinecone/main.go
Expand Up @@ -5,8 +5,9 @@ import (
"fmt"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
pineconeindex "github.com/henomis/lingoose/index/pinecone"
pineconedb "github.com/henomis/lingoose/index/vectordb/pinecone"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -17,37 +18,36 @@ import (

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

pineconeIndex := pineconeindex.New(
pineconeindex.Options{
IndexName: "test",
Namespace: "test-namespace",
IncludeContent: true,
CreateIndex: &pineconeindex.CreateIndexOptions{
Dimension: 1536,
Replicas: 1,
Metric: "cosine",
PodType: "p1.x1",
index := index.New(
pineconedb.New(
pineconedb.Options{
IndexName: "test",
Namespace: "test-namespace",
CreateIndexOptions: &pineconedb.CreateIndexOptions{
Dimension: 1536,
Replicas: 1,
Metric: "cosine",
PodType: "p1.x1",
},
},
},
openaiEmbedder,
)
),
openaiembedder.New(openaiembedder.AdaEmbeddingV2),
).WithIncludeContents(true)

indexIsEmpty, err := pineconeIndex.IsEmpty(context.Background())
indexIsEmpty, err := index.IsEmpty(context.Background())
if err != nil {
panic(err)
}

if indexIsEmpty {
err = ingestData(pineconeIndex)
err = ingestData(index)
if err != nil {
panic(err)
}
}

query := "What is the purpose of the NATO Alliance?"
similarities, err := pineconeIndex.Query(
similarities, err := index.Query(
context.Background(),
query,
indexoption.WithTopK(3),
Expand Down Expand Up @@ -88,7 +88,7 @@ func main() {

}

func ingestData(pineconeIndex *pineconeindex.Index) error {
func ingestData(index *index.Index) error {

documents, err := loader.NewDirectoryLoader(".", ".txt").Load(context.Background())
if err != nil {
Expand All @@ -108,6 +108,6 @@ func ingestData(pineconeIndex *pineconeindex.Index) error {

}

return pineconeIndex.LoadFromDocuments(context.Background(), documentChunks)
return index.LoadFromDocuments(context.Background(), documentChunks)

}
35 changes: 18 additions & 17 deletions examples/embeddings/qdrant/main.go
Expand Up @@ -5,8 +5,9 @@ import (
"fmt"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
indexoption "github.com/henomis/lingoose/index/option"
qdrantindex "github.com/henomis/lingoose/index/qdrant"
qdrantdb "github.com/henomis/lingoose/index/vectordb/qdrant"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
"github.com/henomis/lingoose/prompt"
Expand All @@ -18,34 +19,34 @@ import (

func main() {

openaiEmbedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)

qdrantIndex := qdrantindex.New(
qdrantindex.Options{
CollectionName: "test",
IncludeContent: true,
CreateCollection: &qdrantindex.CreateCollectionOptions{
Dimension: 1536,
Distance: qdrantindex.DistanceCosine,
index := index.New(
qdrantdb.New(
qdrantdb.Options{
CollectionName: "test",
IncludeContent: true,
CreateCollection: &qdrantdb.CreateCollectionOptions{
Dimension: 1536,
Distance: qdrantdb.DistanceCosine,
},
},
},
openaiEmbedder,
).WithAPIKeyAndEdpoint("", "http://localhost:6333")
).WithAPIKeyAndEdpoint("", "http://localhost:6333"),
openaiembedder.New(openaiembedder.AdaEmbeddingV2),
).WithIncludeContents(true)

indexIsEmpty, err := qdrantIndex.IsEmpty(context.Background())
indexIsEmpty, err := index.IsEmpty(context.Background())
if err != nil {
panic(err)
}

if indexIsEmpty {
err = ingestData(qdrantIndex)
err = ingestData(index)
if err != nil {
panic(err)
}
}

query := "What is the purpose of the NATO Alliance?"
similarities, err := qdrantIndex.Query(
similarities, err := index.Query(
context.Background(),
query,
indexoption.WithTopK(3),
Expand Down Expand Up @@ -86,7 +87,7 @@ func main() {

}

func ingestData(qdrantIndex *qdrantindex.Index) error {
func ingestData(qdrantIndex *index.Index) error {

documents, err := loader.NewDirectoryLoader(".", ".txt").Load(context.Background())
if err != nil {
Expand Down
5 changes: 3 additions & 2 deletions examples/embeddings/simplekb/main.go
Expand Up @@ -4,8 +4,9 @@ import (
"context"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index"
"github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
qapipeline "github.com/henomis/lingoose/pipeline/qa"
Expand All @@ -14,7 +15,7 @@ import (

func main() {
docs, _ := loader.NewPDFToTextLoader("./kb").WithTextSplitter(textsplitter.NewRecursiveCharacterTextSplitter(2000, 200)).Load(context.Background())
index := simplevectorindex.New("db", ".", openaiembedder.New(openaiembedder.AdaEmbeddingV2))
index := index.New(jsondb.New("db.json"), openaiembedder.New(openaiembedder.AdaEmbeddingV2)).WithIncludeContents(true)
index.LoadFromDocuments(context.Background(), docs)
qapipeline.New(openai.NewChat().WithVerbose(true)).WithIndex(index).Query(context.Background(), "What is the NATO purpose?", option.WithTopK(1))
}
8 changes: 6 additions & 2 deletions examples/llm/cache/main.go
Expand Up @@ -8,15 +8,19 @@ import (
"strings"

openaiembedder "github.com/henomis/lingoose/embedder/openai"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/index"
"github.com/henomis/lingoose/index/vectordb/jsondb"
"github.com/henomis/lingoose/llm/cache"
"github.com/henomis/lingoose/llm/openai"
)

func main() {

embedder := openaiembedder.New(openaiembedder.AdaEmbeddingV2)
index := simplevectorindex.New("db", ".", embedder)
index := index.New(
jsondb.New("db.json"),
embedder,
)
llm := openai.NewCompletion().WithCompletionCache(cache.New(embedder, index).WithTopK(3))

for {
Expand Down

0 comments on commit e463f6a

Please sign in to comment.