Skip to content

Commit

Permalink
chore: refactor float64 embeddings (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
henomis committed May 2, 2023
1 parent 8f015e4 commit d9146ff
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 31 deletions.
5 changes: 1 addition & 4 deletions embedder/embedding.go
Expand Up @@ -5,7 +5,4 @@ var (
)

// Embedding is the result of an embedding operation.
type Embedding struct {
Embedding []float32 `json:"embedding"`
Index int `json:"index"`
}
type Embedding []float64
13 changes: 9 additions & 4 deletions embedder/openai/openai.go
Expand Up @@ -71,11 +71,16 @@ func (t *openAIEmbedder) Embed(ctx context.Context, docs []document.Document) ([
var embeddings []embedder.Embedding

for _, obj := range resp.Data {
embeddings = append(embeddings, embedder.Embedding{
Embedding: obj.Embedding,
Index: obj.Index,
})
embeddings = append(embeddings, float32ToFloat64(obj.Embedding))
}

return embeddings, nil
}

func float32ToFloat64(slice []float32) []float64 {
newSlice := make([]float64, len(slice))
for i, v := range slice {
newSlice[i] = float64(v)
}
return newSlice
}
4 changes: 2 additions & 2 deletions go.mod
Expand Up @@ -6,8 +6,8 @@ require github.com/mitchellh/mapstructure v1.5.0

require (
github.com/google/uuid v1.3.0
github.com/henomis/pinecone-go v1.0.3
github.com/sashabaranov/go-openai v1.9.0
github.com/henomis/pinecone-go v1.1.0
github.com/sashabaranov/go-openai v1.9.1
)

require github.com/henomis/restclientgo v1.0.3 // indirect
8 changes: 4 additions & 4 deletions go.sum
@@ -1,10 +1,10 @@
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/henomis/pinecone-go v1.0.3 h1:mhWH2IlfdjZW3rIxz23JHZ3HRUiQaoL5DqVcGq3HnOQ=
github.com/henomis/pinecone-go v1.0.3/go.mod h1:FsMMRjLyiJ9zHqGOlmGvjolqOp2kkbMsRm8oc85vykU=
github.com/henomis/pinecone-go v1.1.0 h1:VuiJxTY0wQeUh5e4eD+w30/wKl6KeKYD6292UlsWaOs=
github.com/henomis/pinecone-go v1.1.0/go.mod h1:FsMMRjLyiJ9zHqGOlmGvjolqOp2kkbMsRm8oc85vykU=
github.com/henomis/restclientgo v1.0.3 h1:y5+ydfvWJ0/7crObdnCHSn7ya/h1whD+PV4Ir2dZ9Ig=
github.com/henomis/restclientgo v1.0.3/go.mod h1:xIeTCu2ZstvRn0fCukNpzXLN3m/kRTU0i0RwAbv7Zug=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/sashabaranov/go-openai v1.9.0 h1:NoiO++IISxxJ1pRc0n7uZvMGMake0G+FJ1XPwXtprsA=
github.com/sashabaranov/go-openai v1.9.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/sashabaranov/go-openai v1.9.1 h1:3N52HkJKo9Zlo/oe1AVv5ZkCOny0ra58/ACvAxkN3MM=
github.com/sashabaranov/go-openai v1.9.1/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
2 changes: 1 addition & 1 deletion index/index.go
Expand Up @@ -20,7 +20,7 @@ const (
type SearchResponse struct {
ID string
Document document.Document
Score float32
Score float64
}

type Embedder interface {
Expand Down
6 changes: 3 additions & 3 deletions index/pinecone.go
Expand Up @@ -132,7 +132,7 @@ func (p *pinecone) similaritySearch(ctx context.Context, topK *int, query string
IndexName: p.indexName,
ProjectID: p.projectID,
TopK: int32(pineconeTopK),
Vector: embeddings[0].Embedding,
Vector: embeddings[0],
IncludeMetadata: &includeMetadata,
Namespace: &p.namespace,
},
Expand Down Expand Up @@ -228,7 +228,7 @@ func buildVectorsFromEmbeddingsAndDocuments(

vectors = append(vectors, pineconerequest.Vector{
ID: vectorID.String(),
Values: embedding.Embedding,
Values: embedding,
Metadata: metadata,
})

Expand Down Expand Up @@ -258,7 +258,7 @@ func buildSearchReponsesFromMatches(matches []pineconeresponse.QueryMatch, inclu
id = *match.ID
}

score := float32(0)
score := float64(0)
if match.Score != nil {
score = *match.Score
}
Expand Down
26 changes: 13 additions & 13 deletions index/simpleVectorIndex.go
Expand Up @@ -142,27 +142,27 @@ func (s *simpleVectorIndex) SimilaritySearch(ctx context.Context, query string,
return filterSearchResponses(searchResponses, topK), nil
}

func (s *simpleVectorIndex) cosineSimilarity(a embedder.Embedding, b embedder.Embedding) float32 {
dotProduct := float32(0.0)
normA := float32(0.0)
normB := float32(0.0)

for i := 0; i < len(a.Embedding); i++ {
dotProduct += a.Embedding[i] * b.Embedding[i]
normA += a.Embedding[i] * a.Embedding[i]
normB += b.Embedding[i] * b.Embedding[i]
func (s *simpleVectorIndex) cosineSimilarity(a embedder.Embedding, b embedder.Embedding) float64 {
dotProduct := float64(0.0)
normA := float64(0.0)
normB := float64(0.0)

for i := 0; i < len(a); i++ {
dotProduct += a[i] * b[i]
normA += a[i] * a[i]
normB += b[i] * b[i]
}

if normA == 0 || normB == 0 {
return float32(0.0)
return float64(0.0)
}

return dotProduct / (float32(math.Sqrt(float64(normA))) * float32(math.Sqrt(float64(normB))))
return dotProduct / (math.Sqrt(normA) * math.Sqrt(normB))
}

func (s *simpleVectorIndex) cosineSimilarityBatch(a embedder.Embedding) []float32 {
func (s *simpleVectorIndex) cosineSimilarityBatch(a embedder.Embedding) []float64 {

scores := make([]float32, len(s.data))
scores := make([]float64, len(s.data))

for i := range s.data {
scores[i] = s.cosineSimilarity(a, s.data[i].Embedding)
Expand Down

0 comments on commit d9146ff

Please sign in to comment.