/
api_op_SynthesizeSpeech.go
296 lines (246 loc) · 9.9 KB
/
api_op_SynthesizeSpeech.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
// Code generated by private/model/cli/gen-api/main.go. DO NOT EDIT.
package polly
import (
"context"
"io"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/internal/awsutil"
"github.com/aws/aws-sdk-go-v2/private/protocol"
)
type SynthesizeSpeechInput struct {
_ struct{} `type:"structure"`
// Specifies the engine (standard or neural) for Amazon Polly to use when processing
// input text for speech synthesis. Using a voice that is not supported for
// the engine selected will result in an error.
Engine Engine `type:"string" enum:"true"`
// Optional language code for the Synthesize Speech request. This is only necessary
// if using a bilingual voice, such as Aditi, which can be used for either Indian
// English (en-IN) or Hindi (hi-IN).
//
// If a bilingual voice is used and no language code is specified, Amazon Polly
// will use the default language of the bilingual voice. The default language
// for any voice is the one returned by the DescribeVoices (https://docs.aws.amazon.com/polly/latest/dg/API_DescribeVoices.html)
// operation for the LanguageCode parameter. For example, if no language code
// is specified, Aditi will use Indian English rather than Hindi.
LanguageCode LanguageCode `type:"string" enum:"true"`
// List of one or more pronunciation lexicon names you want the service to apply
// during synthesis. Lexicons are applied only if the language of the lexicon
// is the same as the language of the voice. For information about storing lexicons,
// see PutLexicon (https://docs.aws.amazon.com/polly/latest/dg/API_PutLexicon.html).
LexiconNames []string `type:"list"`
// The format in which the returned output will be encoded. For audio stream,
// this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.
//
// When pcm is used, the content returned is audio/pcm in a signed 16-bit, 1
// channel (mono), little-endian format.
//
// OutputFormat is a required field
OutputFormat OutputFormat `type:"string" required:"true" enum:"true"`
// The audio frequency specified in Hz.
//
// The valid values for mp3 and ogg_vorbis are "8000", "16000", "22050", and
// "24000". The default value for standard voices is "22050". The default value
// for neural voices is "24000".
//
// Valid values for pcm are "8000" and "16000" The default value is "16000".
SampleRate *string `type:"string"`
// The type of speech marks returned for the input text.
SpeechMarkTypes []SpeechMarkType `type:"list"`
// Input text to synthesize. If you specify ssml as the TextType, follow the
// SSML format for the input text.
//
// Text is a required field
Text *string `type:"string" required:"true"`
// Specifies whether the input text is plain text or SSML. The default value
// is plain text. For more information, see Using SSML (https://docs.aws.amazon.com/polly/latest/dg/ssml.html).
TextType TextType `type:"string" enum:"true"`
// Voice ID to use for the synthesis. You can get a list of available voice
// IDs by calling the DescribeVoices (https://docs.aws.amazon.com/polly/latest/dg/API_DescribeVoices.html)
// operation.
//
// VoiceId is a required field
VoiceId VoiceId `type:"string" required:"true" enum:"true"`
}
// String returns the string representation
func (s SynthesizeSpeechInput) String() string {
return awsutil.Prettify(s)
}
// Validate inspects the fields of the type to determine if they are valid.
func (s *SynthesizeSpeechInput) Validate() error {
invalidParams := aws.ErrInvalidParams{Context: "SynthesizeSpeechInput"}
if len(s.OutputFormat) == 0 {
invalidParams.Add(aws.NewErrParamRequired("OutputFormat"))
}
if s.Text == nil {
invalidParams.Add(aws.NewErrParamRequired("Text"))
}
if len(s.VoiceId) == 0 {
invalidParams.Add(aws.NewErrParamRequired("VoiceId"))
}
if invalidParams.Len() > 0 {
return invalidParams
}
return nil
}
// MarshalFields encodes the AWS API shape using the passed in protocol encoder.
func (s SynthesizeSpeechInput) MarshalFields(e protocol.FieldEncoder) error {
e.SetValue(protocol.HeaderTarget, "Content-Type", protocol.StringValue("application/json"), protocol.Metadata{})
if len(s.Engine) > 0 {
v := s.Engine
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "Engine", protocol.QuotedValue{ValueMarshaler: v}, metadata)
}
if len(s.LanguageCode) > 0 {
v := s.LanguageCode
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "LanguageCode", protocol.QuotedValue{ValueMarshaler: v}, metadata)
}
if s.LexiconNames != nil {
v := s.LexiconNames
metadata := protocol.Metadata{}
ls0 := e.List(protocol.BodyTarget, "LexiconNames", metadata)
ls0.Start()
for _, v1 := range v {
ls0.ListAddValue(protocol.QuotedValue{ValueMarshaler: protocol.StringValue(v1)})
}
ls0.End()
}
if len(s.OutputFormat) > 0 {
v := s.OutputFormat
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "OutputFormat", protocol.QuotedValue{ValueMarshaler: v}, metadata)
}
if s.SampleRate != nil {
v := *s.SampleRate
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "SampleRate", protocol.QuotedValue{ValueMarshaler: protocol.StringValue(v)}, metadata)
}
if s.SpeechMarkTypes != nil {
v := s.SpeechMarkTypes
metadata := protocol.Metadata{}
ls0 := e.List(protocol.BodyTarget, "SpeechMarkTypes", metadata)
ls0.Start()
for _, v1 := range v {
ls0.ListAddValue(protocol.QuotedValue{ValueMarshaler: protocol.StringValue(v1)})
}
ls0.End()
}
if s.Text != nil {
v := *s.Text
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "Text", protocol.QuotedValue{ValueMarshaler: protocol.StringValue(v)}, metadata)
}
if len(s.TextType) > 0 {
v := s.TextType
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "TextType", protocol.QuotedValue{ValueMarshaler: v}, metadata)
}
if len(s.VoiceId) > 0 {
v := s.VoiceId
metadata := protocol.Metadata{}
e.SetValue(protocol.BodyTarget, "VoiceId", protocol.QuotedValue{ValueMarshaler: v}, metadata)
}
return nil
}
type SynthesizeSpeechOutput struct {
_ struct{} `type:"structure" payload:"AudioStream"`
// Stream containing the synthesized speech.
AudioStream io.ReadCloser `type:"blob"`
// Specifies the type audio stream. This should reflect the OutputFormat parameter
// in your request.
//
// * If you request mp3 as the OutputFormat, the ContentType returned is
// audio/mpeg.
//
// * If you request ogg_vorbis as the OutputFormat, the ContentType returned
// is audio/ogg.
//
// * If you request pcm as the OutputFormat, the ContentType returned is
// audio/pcm in a signed 16-bit, 1 channel (mono), little-endian format.
//
// * If you request json as the OutputFormat, the ContentType returned is
// audio/json.
ContentType *string `location:"header" locationName:"Content-Type" type:"string"`
// Number of characters synthesized.
RequestCharacters *int64 `location:"header" locationName:"x-amzn-RequestCharacters" type:"integer"`
}
// String returns the string representation
func (s SynthesizeSpeechOutput) String() string {
return awsutil.Prettify(s)
}
// MarshalFields encodes the AWS API shape using the passed in protocol encoder.
func (s SynthesizeSpeechOutput) MarshalFields(e protocol.FieldEncoder) error {
if s.ContentType != nil {
v := *s.ContentType
metadata := protocol.Metadata{}
e.SetValue(protocol.HeaderTarget, "Content-Type", protocol.QuotedValue{ValueMarshaler: protocol.StringValue(v)}, metadata)
}
if s.RequestCharacters != nil {
v := *s.RequestCharacters
metadata := protocol.Metadata{}
e.SetValue(protocol.HeaderTarget, "x-amzn-RequestCharacters", protocol.Int64Value(v), metadata)
}
// Skipping AudioStream Output type's body not valid.
return nil
}
const opSynthesizeSpeech = "SynthesizeSpeech"
// SynthesizeSpeechRequest returns a request value for making API operation for
// Amazon Polly.
//
// Synthesizes UTF-8 input, plain text or SSML, to a stream of bytes. SSML input
// must be valid, well-formed SSML. Some alphabets might not be available with
// all the voices (for example, Cyrillic might not be read at all by English
// voices) unless phoneme mapping is used. For more information, see How it
// Works (https://docs.aws.amazon.com/polly/latest/dg/how-text-to-speech-works.html).
//
// // Example sending a request using SynthesizeSpeechRequest.
// req := client.SynthesizeSpeechRequest(params)
// resp, err := req.Send(context.TODO())
// if err == nil {
// fmt.Println(resp)
// }
//
// Please also see https://docs.aws.amazon.com/goto/WebAPI/polly-2016-06-10/SynthesizeSpeech
func (c *Client) SynthesizeSpeechRequest(input *SynthesizeSpeechInput) SynthesizeSpeechRequest {
op := &aws.Operation{
Name: opSynthesizeSpeech,
HTTPMethod: "POST",
HTTPPath: "/v1/speech",
}
if input == nil {
input = &SynthesizeSpeechInput{}
}
req := c.newRequest(op, input, &SynthesizeSpeechOutput{})
return SynthesizeSpeechRequest{Request: req, Input: input, Copy: c.SynthesizeSpeechRequest}
}
// SynthesizeSpeechRequest is the request type for the
// SynthesizeSpeech API operation.
type SynthesizeSpeechRequest struct {
*aws.Request
Input *SynthesizeSpeechInput
Copy func(*SynthesizeSpeechInput) SynthesizeSpeechRequest
}
// Send marshals and sends the SynthesizeSpeech API request.
func (r SynthesizeSpeechRequest) Send(ctx context.Context) (*SynthesizeSpeechResponse, error) {
r.Request.SetContext(ctx)
err := r.Request.Send()
if err != nil {
return nil, err
}
resp := &SynthesizeSpeechResponse{
SynthesizeSpeechOutput: r.Request.Data.(*SynthesizeSpeechOutput),
response: &aws.Response{Request: r.Request},
}
return resp, nil
}
// SynthesizeSpeechResponse is the response type for the
// SynthesizeSpeech API operation.
type SynthesizeSpeechResponse struct {
*SynthesizeSpeechOutput
response *aws.Response
}
// SDKResponseMetdata returns the response metadata for the
// SynthesizeSpeech request.
func (r *SynthesizeSpeechResponse) SDKResponseMetdata() *aws.Response {
return r.response
}