Skip to content

Commit

Permalink
feat: add HF speech recognition loader (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
henomis committed Jun 1, 2023
1 parent ce311aa commit 891315d
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 0 deletions.
21 changes: 21 additions & 0 deletions examples/loader/hf_speech_recognition/main.go
@@ -0,0 +1,21 @@
package main

import (
"context"
"fmt"

"github.com/henomis/lingoose/loader"
)

func main() {

l := loader.NewHFSpeechRecognitionLoader("/tmp/hello.mp3")

docs, err := l.Load(context.Background())
if err != nil {
panic(err)
}

fmt.Println(docs[0].Content)

}
87 changes: 87 additions & 0 deletions loader/hf_speech_recognition.go
@@ -0,0 +1,87 @@
package loader

import (
"context"
"encoding/json"
"fmt"
"os"
"strings"

"github.com/henomis/lingoose/document"
"github.com/henomis/lingoose/types"
)

const (
hfDefaultSpeechRecognitionModel = "facebook/wav2vec2-large-960h-lv60-self"
)

type hfSpeechRecognition struct {
loader loader

mediaFile string
token string
model string
}

type hfSpeechRecognitionResponse struct {
Text string `json:"text,omitempty"`
}

func NewHFSpeechRecognitionLoader(mediaFile string) *hfSpeechRecognition {
return &hfSpeechRecognition{
mediaFile: mediaFile,
model: hfDefaultSpeechRecognitionModel,
token: os.Getenv("HUGGING_FACE_HUB_TOKEN"),
}
}

func (h *hfSpeechRecognition) WithToken(token string) *hfSpeechRecognition {
h.token = token
return h
}

func (h *hfSpeechRecognition) WithModel(model string) *hfSpeechRecognition {
h.model = model
return h
}

func (h *hfSpeechRecognition) WithTextSplitter(textSplitter TextSplitter) *hfSpeechRecognition {
h.loader.textSplitter = textSplitter
return h
}

func (h *hfSpeechRecognition) Load(ctx context.Context) ([]document.Document, error) {

err := isFile(h.mediaFile)
if err != nil {
return nil, fmt.Errorf("%s: %w", ErrorInternal, err)
}

responseBytes, err := hfMediaHttpCall(ctx, h.token, h.model, h.mediaFile)
if err != nil {
return nil, fmt.Errorf("%s: %w", ErrorInternal, err)
}

response := hfSpeechRecognitionResponse{}
err = json.Unmarshal(responseBytes, &response)
if err != nil {
return nil, fmt.Errorf("%s: %w", ErrorInternal, err)
}

var documents []document.Document
document := document.Document{
Content: response.Text,
Metadata: types.Meta{
"source": h.mediaFile,
},
}

document.Content = strings.TrimSpace(document.Content)
documents = append(documents, document)

if h.loader.textSplitter != nil {
documents = h.loader.textSplitter.SplitDocuments(documents)
}

return documents, nil
}

0 comments on commit 891315d

Please sign in to comment.