/
whisperclient.go
95 lines (77 loc) · 2.36 KB
/
whisperclient.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package whisperclient
import (
"bytes"
"context"
"fmt"
"io"
"mime/multipart"
"net/http"
)
const (
audioTranscriptionURL string = "https://api.openai.com/v1/audio/transcriptions"
FormatSrt string = "srt"
FormatText string = "text"
LanguageEnglish string = "en"
LanguagePortuguese string = "pt"
)
// Client is a wrapper around the WhisperAI API
type Client struct {
httpCli *http.Client
apiKey string
model string
}
// New returns a new Client
func New(httpCli *http.Client, apiKey, model string) *Client {
return &Client{
httpCli: httpCli,
apiKey: apiKey,
model: model,
}
}
// TranscribeAudioInput is the input for the TranscribeAudio method
type TranscribeAudioInput struct {
Name string
Language string
Format string
Data io.Reader
}
// TranscribeAudio transcribes the audio from the given input
func (c *Client) TranscribeAudio(ctx context.Context, in TranscribeAudioInput) ([]byte, error) {
var body bytes.Buffer
writer := multipart.NewWriter(&body)
part, err := writer.CreateFormFile("file", in.Name)
if err != nil {
return nil, fmt.Errorf("could not create form file: %w", err)
}
if _, err := io.Copy(part, in.Data); err != nil {
return nil, fmt.Errorf("could not copy data to form file: %w", err)
}
if err := writer.WriteField("model", c.model); err != nil {
return nil, fmt.Errorf("could not write model field: %w", err)
}
if err := writer.WriteField("language", in.Language); err != nil {
return nil, fmt.Errorf("could not write language field: %w", err)
}
if err := writer.WriteField("response_format", in.Format); err != nil {
return nil, fmt.Errorf("could not write response_format field: %w", err)
}
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("could not close writer: %w", err)
}
request, err := http.NewRequestWithContext(ctx, http.MethodPost, audioTranscriptionURL, &body)
if err != nil {
return nil, fmt.Errorf("could not create request: %w", err)
}
request.Header.Set("Authorization", "Bearer "+c.apiKey)
request.Header.Set("Content-Type", writer.FormDataContentType())
response, err := c.httpCli.Do(request)
if err != nil {
return nil, fmt.Errorf("could not send request: %w", err)
}
defer response.Body.Close()
b, err := io.ReadAll(response.Body)
if err != nil {
return nil, fmt.Errorf("could not read response body: %w", err)
}
return b, nil
}