Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactor float64 embeddings #42

Merged
merged 1 commit into from May 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 1 addition & 4 deletions embedder/embedding.go
Expand Up @@ -5,7 +5,4 @@ var (
)

// Embedding is the result of an embedding operation.
type Embedding struct {
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
}
type Embedding []float64
13 changes: 9 additions & 4 deletions embedder/openai/openai.go
Expand Up @@ -71,11 +71,16 @@ func (t *openAIEmbedder) Embed(ctx context.Context, docs []document.Document) ([
var embeddings []embedder.Embedding

for _, obj := range resp.Data {
embeddings = append(embeddings, embedder.Embedding{
Embedding: obj.Embedding,
Index: obj.Index,
})
embeddings = append(embeddings, float32ToFloat64(obj.Embedding))
}

return embeddings, nil
}

func float32ToFloat64(slice []float32) []float64 {
newSlice := make([]float64, len(slice))
for i, v := range slice {
newSlice[i] = float64(v)
}
return newSlice
}
4 changes: 2 additions & 2 deletions go.mod
Expand Up @@ -6,8 +6,8 @@ require github.com/mitchellh/mapstructure v1.5.0

require (
github.com/google/uuid v1.3.0
github.com/henomis/pinecone-go v1.0.3
github.com/sashabaranov/go-openai v1.9.0
github.com/henomis/pinecone-go v1.1.0
github.com/sashabaranov/go-openai v1.9.1
)

require github.com/henomis/restclientgo v1.0.3 // indirect
8 changes: 4 additions & 4 deletions go.sum
@@ -1,10 +1,10 @@
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/henomis/pinecone-go v1.0.3 h1:mhWH2IlfdjZW3rIxz23JHZ3HRUiQaoL5DqVcGq3HnOQ=
github.com/henomis/pinecone-go v1.0.3/go.mod h1:FsMMRjLyiJ9zHqGOlmGvjolqOp2kkbMsRm8oc85vykU=
github.com/henomis/pinecone-go v1.1.0 h1:VuiJxTY0wQeUh5e4eD+w30/wKl6KeKYD6292UlsWaOs=
github.com/henomis/pinecone-go v1.1.0/go.mod h1:FsMMRjLyiJ9zHqGOlmGvjolqOp2kkbMsRm8oc85vykU=
github.com/henomis/restclientgo v1.0.3 h1:y5+ydfvWJ0/7crObdnCHSn7ya/h1whD+PV4Ir2dZ9Ig=
github.com/henomis/restclientgo v1.0.3/go.mod h1:xIeTCu2ZstvRn0fCukNpzXLN3m/kRTU0i0RwAbv7Zug=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
2 changes: 1 addition & 1 deletion index/index.go
Expand Up @@ -20,7 +20,7 @@ const (
type SearchResponse struct {
ID string
Document document.Document
Score float32
Score float64
}

type Embedder interface {
Expand Down
6 changes: 3 additions & 3 deletions index/pinecone.go
Expand Up @@ -132,7 +132,7 @@ func (p *pinecone) similaritySearch(ctx context.Context, topK *int, query string
IndexName: p.indexName,
ProjectID: p.projectID,
TopK: int32(pineconeTopK),
Vector: embeddings[0].Embedding,
Vector: embeddings[0],
IncludeMetadata: &includeMetadata,
Namespace: &p.namespace,
},
Expand Down Expand Up @@ -228,7 +228,7 @@ func buildVectorsFromEmbeddingsAndDocuments(

vectors = append(vectors, pineconerequest.Vector{
ID: vectorID.String(),
Values: embedding.Embedding,
Values: embedding,
Metadata: metadata,
})

Expand Down Expand Up @@ -258,7 +258,7 @@ func buildSearchReponsesFromMatches(matches []pineconeresponse.QueryMatch, inclu
id = *match.ID
}

score := float32(0)
score := float64(0)
if match.Score != nil {
score = *match.Score
}
Expand Down
26 changes: 13 additions & 13 deletions index/simpleVectorIndex.go
Expand Up @@ -142,27 +142,27 @@ func (s *simpleVectorIndex) SimilaritySearch(ctx context.Context, query string,
return filterSearchResponses(searchResponses, topK), nil
}

func (s *simpleVectorIndex) cosineSimilarity(a embedder.Embedding, b embedder.Embedding) float32 {
dotProduct := float32(0.0)
normA := float32(0.0)
normB := float32(0.0)

for i := 0; i < len(a.Embedding); i++ {
dotProduct += a.Embedding[i] * b.Embedding[i]
normA += a.Embedding[i] * a.Embedding[i]
normB += b.Embedding[i] * b.Embedding[i]
func (s *simpleVectorIndex) cosineSimilarity(a embedder.Embedding, b embedder.Embedding) float64 {
dotProduct := float64(0.0)
normA := float64(0.0)
normB := float64(0.0)

for i := 0; i < len(a); i++ {
dotProduct += a[i] * b[i]
normA += a[i] * a[i]
normB += b[i] * b[i]
}

if normA == 0 || normB == 0 {
return float32(0.0)
return float64(0.0)
}

return dotProduct / (float32(math.Sqrt(float64(normA))) * float32(math.Sqrt(float64(normB))))
return dotProduct / (math.Sqrt(normA) * math.Sqrt(normB))
}

func (s *simpleVectorIndex) cosineSimilarityBatch(a embedder.Embedding) []float32 {
func (s *simpleVectorIndex) cosineSimilarityBatch(a embedder.Embedding) []float64 {

scores := make([]float32, len(s.data))
scores := make([]float64, len(s.data))

for i := range s.data {
scores[i] = s.cosineSimilarity(a, s.data[i].Embedding)
Expand Down