/
SpeechToText.go
71 lines (64 loc) · 1.49 KB
/
SpeechToText.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package GoogleAPI
import (
"encoding/base64"
"net/http"
speech "google.golang.org/api/speech/v1beta1"
"io/ioutil"
)
//AudioConfig Google Speech-to-Text config
type AudioConfig struct {
FilePath string
LanguageCode string
AudioEncoding string
AudioSampleRate int64
}
//SpeechToText get the speech to text
func SpeechToText(client *http.Client, c AudioConfig) (string, error) {
if c.AudioEncoding == "" {
c.AudioEncoding = "FLAC"
}
if c.AudioSampleRate == 0 {
c.AudioSampleRate = 16000
}
if c.LanguageCode == "" {
c.LanguageCode = "en-US"
}
speechService, err := speech.New(client)
if err != nil {
// fmt.Println(err)
return "", err
}
fileDir := c.FilePath
audioData, err := ioutil.ReadFile(fileDir)
if err != nil {
// fmt.Println(err)
return "", err
}
encoded := base64.StdEncoding.EncodeToString(audioData)
speechRecConfig := speech.RecognitionConfig{
SampleRate: c.AudioSampleRate,
Encoding: c.AudioEncoding,
LanguageCode: c.LanguageCode,
}
audio := speech.RecognitionAudio{
Content: encoded,
}
// fmt.Println(encoded)
// fmt.Println(speechRecConfig)
speechRequest := speech.SyncRecognizeRequest{
Audio: &audio,
Config: &speechRecConfig,
}
syncRecCall := speechService.Speech.Syncrecognize(&speechRequest)
syncRecResponse, err := syncRecCall.Do()
if err != nil {
// fmt.Println(err)
return "", err
}
resp, err := syncRecResponse.MarshalJSON()
if err != nil {
// fmt.Println(err)
return "", err
}
return string(resp), nil
}