This repository has been archived by the owner on Oct 1, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 464
/
main.js
244 lines (201 loc) · 8.22 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
const audioUtils = require('./audioUtils'); // for encoding audio data as PCM
const crypto = require('crypto'); // tot sign our pre-signed URL
const v4 = require('./aws-signature-v4'); // to generate our pre-signed URL
const marshaller = require("@aws-sdk/eventstream-marshaller"); // for converting binary event stream messages to and from JSON
const util_utf8_node = require("@aws-sdk/util-utf8-node"); // utilities for encoding and decoding UTF8
const mic = require('microphone-stream'); // collect microphone input as a stream of raw bytes
// our converter between binary event streams messages and JSON
const eventStreamMarshaller = new marshaller.EventStreamMarshaller(util_utf8_node.toUtf8, util_utf8_node.fromUtf8);
// our global variables for managing state
let languageCode;
let region;
let sampleRate;
let inputSampleRate;
let transcription = "";
let socket;
let micStream;
let socketError = false;
let transcribeException = false;
// check to see if the browser allows mic access
if (!window.navigator.mediaDevices.getUserMedia) {
// Use our helper method to show an error on the page
showError('We support the latest versions of Chrome, Firefox, Safari, and Edge. Update your browser and try your request again.');
// maintain enabled/distabled state for the start and stop buttons
toggleStartStop();
}
$('#start-button').click(function () {
$('#error').hide(); // hide any existing errors
toggleStartStop(true); // disable start and enable stop button
// set the language and region from the dropdowns
setLanguage();
setRegion();
// first we get the microphone input from the browser (as a promise)...
window.navigator.mediaDevices.getUserMedia({
video: false,
audio: true
})
// ...then we convert the mic stream to binary event stream messages when the promise resolves
.then(streamAudioToWebSocket)
.catch(function (error) {
showError('There was an error streaming your audio to Amazon Transcribe. Please try again.');
toggleStartStop();
});
});
let streamAudioToWebSocket = function (userMediaStream) {
//let's get the mic input from the browser, via the microphone-stream module
micStream = new mic();
micStream.on("format", function(data) {
inputSampleRate = data.sampleRate;
});
micStream.setStream(userMediaStream);
// Pre-signed URLs are a way to authenticate a request (or WebSocket connection, in this case)
// via Query Parameters. Learn more: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
let url = createPresignedUrl();
//open up our WebSocket connection
socket = new WebSocket(url);
socket.binaryType = "arraybuffer";
let sampleRate = 0;
// when we get audio data from the mic, send it to the WebSocket if possible
socket.onopen = function() {
micStream.on('data', function(rawAudioChunk) {
// the audio stream is raw audio bytes. Transcribe expects PCM with additional metadata, encoded as binary
let binary = convertAudioToBinaryMessage(rawAudioChunk);
if (socket.readyState === socket.OPEN)
socket.send(binary);
}
)};
// handle messages, errors, and close events
wireSocketEvents();
}
function setLanguage() {
languageCode = $('#language').find(':selected').val();
if (languageCode == "en-US" || languageCode == "es-US")
sampleRate = 44100;
else
sampleRate = 8000;
}
function setRegion() {
region = $('#region').find(':selected').val();
}
function wireSocketEvents() {
// handle inbound messages from Amazon Transcribe
socket.onmessage = function (message) {
//convert the binary event stream message to JSON
let messageWrapper = eventStreamMarshaller.unmarshall(Buffer(message.data));
let messageBody = JSON.parse(String.fromCharCode.apply(String, messageWrapper.body));
if (messageWrapper.headers[":message-type"].value === "event") {
handleEventStreamMessage(messageBody);
}
else {
transcribeException = true;
showError(messageBody.Message);
toggleStartStop();
}
};
socket.onerror = function () {
socketError = true;
showError('WebSocket connection error. Try again.');
toggleStartStop();
};
socket.onclose = function (closeEvent) {
micStream.stop();
// the close event immediately follows the error event; only handle one.
if (!socketError && !transcribeException) {
if (closeEvent.code != 1000) {
showError('</i><strong>Streaming Exception</strong><br>' + closeEvent.reason);
}
toggleStartStop();
}
};
}
let handleEventStreamMessage = function (messageJson) {
let results = messageJson.Transcript.Results;
if (results.length > 0) {
if (results[0].Alternatives.length > 0) {
let transcript = results[0].Alternatives[0].Transcript;
// fix encoding for accented characters
transcript = decodeURIComponent(escape(transcript));
// update the textarea with the latest result
$('#transcript').val(transcription + transcript + "\n");
// if this transcript segment is final, add it to the overall transcription
if (!results[0].IsPartial) {
//scroll the textarea down
$('#transcript').scrollTop($('#transcript')[0].scrollHeight);
transcription += transcript + "\n";
}
}
}
}
let closeSocket = function () {
if (socket.readyState === socket.OPEN) {
micStream.stop();
// Send an empty frame so that Transcribe initiates a closure of the WebSocket after submitting all transcripts
let emptyMessage = getAudioEventMessage(Buffer.from(new Buffer([])));
let emptyBuffer = eventStreamMarshaller.marshall(emptyMessage);
socket.send(emptyBuffer);
}
}
$('#stop-button').click(function () {
closeSocket();
toggleStartStop();
});
$('#reset-button').click(function (){
$('#transcript').val('');
transcription = '';
});
function toggleStartStop(disableStart = false) {
$('#start-button').prop('disabled', disableStart);
$('#stop-button').attr("disabled", !disableStart);
}
function showError(message) {
$('#error').html('<i class="fa fa-times-circle"></i> ' + message);
$('#error').show();
}
function convertAudioToBinaryMessage(audioChunk) {
let raw = mic.toRaw(audioChunk);
if (raw == null)
return;
// downsample and convert the raw audio bytes to PCM
let downsampledBuffer = audioUtils.downsampleBuffer(raw, inputSampleRate, sampleRate);
let pcmEncodedBuffer = audioUtils.pcmEncode(downsampledBuffer);
// add the right JSON headers and structure to the message
let audioEventMessage = getAudioEventMessage(Buffer.from(pcmEncodedBuffer));
//convert the JSON object + headers into a binary event stream message
let binary = eventStreamMarshaller.marshall(audioEventMessage);
return binary;
}
function getAudioEventMessage(buffer) {
// wrap the audio data in a JSON envelope
return {
headers: {
':message-type': {
type: 'string',
value: 'event'
},
':event-type': {
type: 'string',
value: 'AudioEvent'
}
},
body: buffer
};
}
function createPresignedUrl() {
let endpoint = "transcribestreaming." + region + ".amazonaws.com:8443";
// get a preauthenticated URL that we can use to establish our WebSocket
return v4.createPresignedURL(
'GET',
endpoint,
'/stream-transcription-websocket',
'transcribe',
crypto.createHash('sha256').update('', 'utf8').digest('hex'), {
'key': $('#access_id').val(),
'secret': $('#secret_key').val(),
'sessionToken': $('#session_token').val(),
'protocol': 'wss',
'expires': 15,
'region': region,
'query': "language-code=" + languageCode + "&media-encoding=pcm&sample-rate=" + sampleRate
}
);
}