working prototype of streaming speech recognition with hotword detection

evancohen · Sep 16, 2016 · 02c32b2 · evancohen · Mar 2, 2017 · 02c32b2
1 parent 2915720
commit 02c32b2
Show file tree

Hide file tree

Showing 8 changed files with 234 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -35,3 +35,6 @@ jspm_packages
 
 # Optional REPL history
 .node_repl_history
+
+# VS Code
+.vscode
diff --git a/example.js b/example.js
@@ -0,0 +1,21 @@
+const Sonus = require('./index.js');
+
+const sonus = new Sonus({
+  resource: "resources/common.res",
+  model: "resources/snowboy.umdl",
+  sensitivity: "0.5",
+  audioGain: 2.0
+})
+
+sonus.start();
+
+sonus.on('partial-result', function(result) {
+  console.log("Partial", result);
+})
+
+sonus.on('final-result', function(result) {
+  console.log("Final", result);
+  if(result.transcript.includes("exit")){
+    sonus.stop();
+  }
+})
diff --git a/index.js b/index.js
@@ -0,0 +1,172 @@
+'use strict';
+// Google Speech Dependencies
+const path = require('path');
+const grpc = require('grpc');
+const googleProtoFiles = require('google-proto-files');
+const googleAuth = require('google-auto-auth');
+
+//Sonus dependencies
+const record = require('node-record-lpcm16');
+const stream = require('stream');
+const Snowboy = require('snowboy');
+
+
+// Load gRPC proto files for speech recognition
+function _getSpeechProto() {
+  var PROTO_ROOT_DIR = googleProtoFiles('..');
+  var protoDescriptor = grpc.load({
+    root: PROTO_ROOT_DIR,
+    file: path.relative(PROTO_ROOT_DIR, googleProtoFiles.speech.v1beta1)
+  }, 'proto', {
+      binaryAsBase64: true,
+      convertFieldsToCamelCase: true
+    });
+  return protoDescriptor.google.cloud.speech.v1beta1;
+}
+
+// Authenticate with Google
+function _getGoogleAuthClient(speechProto, resolve, reject) {
+  // Set auth scopes
+  var googleAuthClient = googleAuth({
+    scopes: ['https://www.googleapis.com/auth/cloud-platform']
+  });
+  googleAuthClient.getAuthClient(function (err, authClient) {
+    if (err) {
+      reject(error);
+    }
+    var credentials = grpc.credentials.combineChannelCredentials(
+      grpc.credentials.createSsl(),
+      grpc.credentials.createFromGoogleCredential(authClient)
+    );
+    console.log('Loading speech service...');
+    resolve(new speechProto.Speech('speech.googleapis.com', credentials));
+  });
+}
+
+// Request transformation
+var _toRecognizeRequest = new stream.Transform({ objectMode: true });
+_toRecognizeRequest._transform = function (chunk, encoding, done) {
+  done(null, {
+    audioContent: chunk
+  });
+};
+
+// Streaming: Google Speech Recognition
+class GoogleSpeechRecognition extends stream.Writable {
+  constructor() {
+    super();
+    this.speechServicePromise = new Promise(
+      function (resolve, reject) {
+        _getGoogleAuthClient(_getSpeechProto(), resolve, reject);
+      }
+    );
+  }
+
+  startStreaming(audioStream) {
+    const _this = this;
+    this.speechServicePromise.then(function (speechService) {
+      var call = speechService.streamingRecognize()
+
+      // Listen for various responses
+      call.on('error', function (error) {
+        _this.emit('error', error);
+      });
+
+      call.on('data', function (recognizeResponse) {
+        if (recognizeResponse) {
+          _this.emit('response', recognizeResponse);
+          //after we get a final result, unpipe
+          if(recognizeResponse.results[0] && recognizeResponse.results[0].isFinal){
+            audioStream.unpipe(_toRecognizeRequest)
+          }
+        }
+      });
+      call.on('end', function () {
+        _this.emit('end');
+        _this.end();
+      });
+
+      // Write the initial recognize reqeust
+      call.write({
+        streamingConfig: {
+          config: {
+            encoding: 'LINEAR16',
+            sampleRate: 16000
+          },
+          interimResults: true,
+          singleUtterance: true
+        }
+      });
+
+      // Stream the audio to the Speech API
+      audioStream
+        .pipe(_toRecognizeRequest)
+        .pipe(call);
+    }, function (error) {
+      // An authorization error occured with Google.
+      _this.emit('error', error);
+    });
+  }
+}
+
+
+
+// BEGIN SONUS
+class Sonus extends stream.Writable {
+  constructor(options) {
+    super();
+
+    const _this = this;
+    // Set up Snowboy
+    this._mic = {};
+    this._snowboy = new Snowboy(options);
+    this._gs = new GoogleSpeechRecognition();
+
+    this._snowboy.on('silence', function () {
+      console.log('silence');
+    })
+
+    this._snowboy.on('noise', function () {
+      console.log('noise');
+    })
+
+    // When a hotword is detected pipe the audio stream to speech detection
+    this._snowboy.on('hotword', function (index) {
+      console.log("Keyword spotted:", index);
+      _this._gs.startStreaming(_this._mic);
+    });
+
+    this._gs.on('error', function (error) {
+      this.emit('error', { streamingError: error });
+    });
+
+    this._gs.on('response', function (response) {
+      if (response.results[0]){
+        if(response.results[0].isFinal){
+           _this.emit('final-result', response.results[0].alternatives[0]);
+        } else {
+           _this.emit('partial-result', response.results[0].alternatives[0])
+        }
+      }
+    });
+    this._gs.on('end', function () {
+      //Nothing to do here
+      console.log("TIME TO UNPIPE")
+    });
+  }
+
+  start() {
+    this._mic = record.start({
+      threshold: 0,
+      verbose: false
+    });
+    this._mic.pipe(this._snowboy);
+  }
+
+  stop() {
+    record.stop();
+  }
+
+}
+
+module.exports = Sonus;
diff --git a/package.json b/package.json
@@ -0,0 +1,38 @@
+{
+  "name": "sonus",
+  "version": "0.0.1",
+  "description": "Open source cross platform decentralized always-on speech recognition framework",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/evancohen/sonus.git"
+  },
+  "keywords": [
+    "keyword",
+    "spotting",
+    "hotword",
+    "detection",
+    "STT",
+    "speech",
+    "voice"
+  ],
+  "author": "Evan Cohen",
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/evancohen/sonus/issues"
+  },
+  "homepage": "https://github.com/evancohen/sonus#readme",
+  "dependencies": {
+    "async": "^2.0.1",
+    "google-auto-auth": "^0.2.4",
+    "google-proto-files": "^0.3.0",
+    "googleapis": "^12.0.0",
+    "grpc": "^0.15.0",
+    "node-record-lpcm16": "^0.1.4",
+    "snowboy": "^1.0.7",
+    "stream": "0.0.2"
+  }
+}
diff --git a/resources/common.res b/resources/common.res
diff --git a/resources/ding.wav b/resources/ding.wav
diff --git a/resources/dong.wav b/resources/dong.wav
diff --git a/resources/snowboy.umdl b/resources/snowboy.umdl