Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add drop and delete methods #153

Merged
merged 2 commits into from Nov 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions go.mod
Expand Up @@ -12,7 +12,7 @@ require (
github.com/henomis/cohere-go v1.0.1
github.com/henomis/milvus-go v0.0.4
github.com/henomis/pinecone-go v1.1.2
github.com/henomis/qdrant-go v1.0.2
github.com/henomis/qdrant-go v1.1.0
github.com/invopop/jsonschema v0.7.0
github.com/pkoukk/tiktoken-go v0.1.1
github.com/sashabaranov/go-openai v1.12.0
Expand All @@ -21,6 +21,6 @@ require (
require (
github.com/dlclark/regexp2 v1.8.1 // indirect
github.com/gomodule/redigo v1.8.9 // indirect
github.com/henomis/restclientgo v1.0.6 // indirect
github.com/henomis/restclientgo v1.1.0 // indirect
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0 // indirect
)
8 changes: 4 additions & 4 deletions go.sum
Expand Up @@ -15,10 +15,10 @@ github.com/henomis/milvus-go v0.0.4 h1:ArddXRJx/EGdQ75gB7TyEzD4Z4BqXzW16p8jRcjJO
github.com/henomis/milvus-go v0.0.4/go.mod h1:nZ/NvDOLoGl7FQrYSm0JfeefPBVXph9PpE4y3lPpbj4=
github.com/henomis/pinecone-go v1.1.2 h1:hOEhk/WAT7r3ini12fyyoAIT65nzeqmWG8t4YObvOZ8=
github.com/henomis/pinecone-go v1.1.2/go.mod h1:0u2hta1zssq/aaozgS6Nn61CMKlA95Pg6mVsE8zMNus=
github.com/henomis/qdrant-go v1.0.2 h1:fxLfkPsaKqaI9pJ92AuWWWQo5sZEy4ZPrhEse1m5eog=
github.com/henomis/qdrant-go v1.0.2/go.mod h1:gKGGarpdiYg8CNsPNBxKfBg03hbfYy+K3hRatcZ/lUE=
github.com/henomis/restclientgo v1.0.6 h1:sKTHmmG2D37TX4ZTInDDM7dNbT6qn8E3CQdTSTCbAq0=
github.com/henomis/restclientgo v1.0.6/go.mod h1:xIeTCu2ZstvRn0fCukNpzXLN3m/kRTU0i0RwAbv7Zug=
github.com/henomis/qdrant-go v1.1.0 h1:GU5R4ZZKeD0JXLZ0yv37FUzI0spGECRr2WlE3prmLIY=
github.com/henomis/qdrant-go v1.1.0/go.mod h1:p274sBhQPDnfjCzJentWoNIGFadl5yIcLM4Xnmmwk9k=
github.com/henomis/restclientgo v1.1.0 h1:qNhBpTwYXuwfy6SL2EWjbfTrlUw70PZYmqQ4jewdu5E=
github.com/henomis/restclientgo v1.1.0/go.mod h1:xIeTCu2ZstvRn0fCukNpzXLN3m/kRTU0i0RwAbv7Zug=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0 h1:i462o439ZjprVSFSZLZxcsoAe592sZB1rci2Z8j4wdk=
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
github.com/invopop/jsonschema v0.7.0 h1:2vgQcBz1n256N+FpX3Jq7Y17AjYt46Ig3zIWyy770So=
Expand Down
2 changes: 2 additions & 0 deletions index/index.go
Expand Up @@ -40,6 +40,8 @@ type VectorDB interface {
Insert(context.Context, []Data) error
IsEmpty(context.Context) (bool, error)
Search(context.Context, []float64, *option.Options) (SearchResults, error)
Drop(ctx context.Context) error
Delete(ctx context.Context, ids []string) error
}

type Index struct {
Expand Down
102 changes: 68 additions & 34 deletions index/vectordb/jsondb/jsondb.go
Expand Up @@ -16,6 +16,8 @@ import (
"github.com/henomis/lingoose/types"
)

var _ index.VectorDB = &DB{}

type data struct {
ID string `json:"id"`
Metadata types.Meta `json:"metadata"`
Expand All @@ -40,57 +42,57 @@ func New() *DB {
return index
}

func (i *DB) WithPersist(dbPath string) *DB {
i.dbPath = dbPath
return i
func (d *DB) WithPersist(dbPath string) *DB {
d.dbPath = dbPath
return d
}

func (i *DB) save() error {
if i.dbPath == "" {
func (d *DB) save() error {
if d.dbPath == "" {
return nil
}

jsonContent, err := json.Marshal(i.data)
jsonContent, err := json.Marshal(d.data)
if err != nil {
return err
}

return os.WriteFile(i.dbPath, jsonContent, 0600)
return os.WriteFile(d.dbPath, jsonContent, 0600)
}

func (i *DB) load() error {
if i.dbPath == "" {
func (d *DB) load() error {
if d.dbPath == "" {
return nil
}

if len(i.data) > 0 {
if len(d.data) > 0 {
return nil
}

if _, err := os.Stat(i.dbPath); os.IsNotExist(err) {
return i.save()
if _, err := os.Stat(d.dbPath); os.IsNotExist(err) {
return d.save()
}

content, err := os.ReadFile(i.dbPath)
content, err := os.ReadFile(d.dbPath)
if err != nil {
return err
}

return json.Unmarshal(content, &i.data)
return json.Unmarshal(content, &d.data)
}

func (i *DB) IsEmpty(_ context.Context) (bool, error) {
err := i.load()
func (d *DB) IsEmpty(_ context.Context) (bool, error) {
err := d.load()
if err != nil {
return true, fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return len(i.data) == 0, nil
return len(d.data) == 0, nil
}

func (i *DB) Insert(ctx context.Context, datas []index.Data) error {
func (d *DB) Insert(ctx context.Context, datas []index.Data) error {
_ = ctx
err := i.load()
err := d.load()
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}
Expand All @@ -113,21 +115,53 @@ func (i *DB) Insert(ctx context.Context, datas []index.Data) error {
records = append(records, point)
}

i.data = append(i.data, records...)
d.data = append(d.data, records...)

return i.save()
return d.save()
}

func (i *DB) Search(ctx context.Context, values []float64, options *option.Options) (index.SearchResults, error) {
err := i.load()
func (d *DB) Search(ctx context.Context, values []float64, options *option.Options) (index.SearchResults, error) {
err := d.load()
if err != nil {
return nil, fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return i.similaritySearch(ctx, values, options)
return d.similaritySearch(ctx, values, options)
}

func (d *DB) Drop(ctx context.Context) error {
_ = ctx
d.data = []data{}
return d.save()
}

func (d *DB) Delete(ctx context.Context, ids []string) error {
_ = ctx
err := d.load()
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

var newRecords []data
for _, record := range d.data {
found := false
for _, id := range ids {
if record.ID == id {
found = true
break
}
}
if !found {
newRecords = append(newRecords, record)
}
}

d.data = newRecords

return d.save()
}

func (i *DB) similaritySearch(
func (d *DB) similaritySearch(
_ context.Context,
embedding embedder.Embedding,
opts *option.Options,
Expand All @@ -136,7 +170,7 @@ func (i *DB) similaritySearch(
opts = index.GetDefaultOptions()
}

scores, err := i.cosineSimilarityBatch(embedding)
scores, err := d.cosineSimilarityBatch(embedding)
if err != nil {
return nil, fmt.Errorf("%w: %w", index.ErrInternal, err)
}
Expand All @@ -146,9 +180,9 @@ func (i *DB) similaritySearch(
for j, score := range scores {
searchResults[j] = index.SearchResult{
Data: index.Data{
ID: i.data[j].ID,
Values: i.data[j].Values,
Metadata: i.data[j].Metadata,
ID: d.data[j].ID,
Values: d.data[j].Values,
Metadata: d.data[j].Metadata,
},
Score: score,
}
Expand All @@ -161,7 +195,7 @@ func (i *DB) similaritySearch(
return filterSearchResults(searchResults, opts.TopK), nil
}

func (i *DB) cosineSimilarity(a []float64, b []float64) (cosine float64, err error) {
func (d *DB) cosineSimilarity(a []float64, b []float64) (cosine float64, err error) {
var count int
lengthA := len(a)
lengthB := len(b)
Expand Down Expand Up @@ -192,12 +226,12 @@ func (i *DB) cosineSimilarity(a []float64, b []float64) (cosine float64, err err
return sumA / (math.Sqrt(s1) * math.Sqrt(s2)), nil
}

func (i *DB) cosineSimilarityBatch(a embedder.Embedding) ([]float64, error) {
func (d *DB) cosineSimilarityBatch(a embedder.Embedding) ([]float64, error) {
var err error
scores := make([]float64, len(i.data))
scores := make([]float64, len(d.data))

for j := range i.data {
scores[j], err = i.cosineSimilarity(a, i.data[j].Values)
for j := range d.data {
scores[j], err = d.cosineSimilarity(a, d.data[j].Values)
if err != nil {
return nil, err
}
Expand Down
45 changes: 45 additions & 0 deletions index/vectordb/milvus/milvus.go
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"os"
"strconv"

"github.com/henomis/lingoose/index"
"github.com/henomis/lingoose/index/option"
Expand All @@ -14,6 +15,8 @@ import (
milvusgoresponse "github.com/henomis/milvus-go/response"
)

var _ index.VectorDB = &DB{}

type DB struct {
milvusClient *milvusgo.Client
databaseName *string
Expand Down Expand Up @@ -98,6 +101,48 @@ func (d *DB) Search(ctx context.Context, values []float64, options *option.Optio
return buildSearchResultsFromMilvusMatches(matches), nil
}

func (d *DB) Drop(ctx context.Context) error {
err := d.milvusClient.CollectionDrop(
ctx,
&milvusgorequest.CollectionDrop{
CollectionName: d.collectionName,
},
&milvusgoresponse.CollectionDrop{},
)

if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return nil
}

func (d *DB) Delete(ctx context.Context, ids []string) error {
idsAsInt := make([]uint64, len(ids))
for i, id := range ids {
idAsInt, err := strconv.ParseUint(id, 10, 64)
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

idsAsInt[i] = idAsInt
}

err := d.milvusClient.VectorDelete(
ctx,
&milvusgorequest.VectorDelete{
CollectionName: d.collectionName,
ID: idsAsInt,
},
&milvusgoresponse.VectorDelete{},
)
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return nil
}

func (d *DB) similaritySearch(
ctx context.Context,
values []float64,
Expand Down
35 changes: 35 additions & 0 deletions index/vectordb/pinecone/pinecone.go
Expand Up @@ -14,6 +14,8 @@ import (
pineconegoresponse "github.com/henomis/pinecone-go/response"
)

var _ index.VectorDB = &DB{}

type DB struct {
pineconeClient *pineconego.PineconeGo
indexName string
Expand Down Expand Up @@ -102,6 +104,39 @@ func (d *DB) Search(ctx context.Context, values []float64, options *option.Optio
return buildSearchResultsFromPineconeMatches(matches), nil
}

func (d *DB) Drop(ctx context.Context) error {
err := d.pineconeClient.IndexDelete(ctx, &pineconegorequest.IndexDelete{
IndexName: d.indexName,
}, &pineconegoresponse.IndexDelete{})
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return nil
}

func (d *DB) Delete(ctx context.Context, ids []string) error {
err := d.getProjectID(ctx)
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

for _, id := range ids {
idToDelete := id

deleteErr := d.pineconeClient.VectorDelete(ctx, &pineconegorequest.VectorDelete{
IndexName: d.indexName,
ProjectID: *d.projectID,
ID: &idToDelete,
}, &pineconegoresponse.VectorDelete{})
if deleteErr != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, deleteErr)
}
}

return nil
}

func (d *DB) similaritySearch(
ctx context.Context,
values []float64,
Expand Down
24 changes: 24 additions & 0 deletions index/vectordb/postgres/postgres.go
Expand Up @@ -14,6 +14,8 @@ import (
"github.com/henomis/lingoose/types"
)

var _ index.VectorDB = &DB{}

type DB struct {
db *sql.DB
table string
Expand Down Expand Up @@ -114,6 +116,28 @@ func (d *DB) Search(ctx context.Context, values []float64, options *option.Optio
return d.similaritySearch(ctx, values, options)
}

func (d *DB) Drop(ctx context.Context) error {
_, err := d.db.ExecContext(ctx, fmt.Sprintf("DROP TABLE IF EXISTS %s", d.table))
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return nil
}

func (d *DB) Delete(ctx context.Context, ids []string) error {
_, err := d.db.ExecContext(
ctx,
fmt.Sprintf("DELETE FROM %s WHERE id = ANY($1)", d.table),
ids,
)
if err != nil {
return fmt.Errorf("%w: %w", index.ErrInternal, err)
}

return nil
}

func (d *DB) similaritySearch(
ctx context.Context,
values []float64,
Expand Down