code-dot-org · JillianK · Jun 30, 2020 · May 29, 2020 · May 29, 2020 · Jun 1, 2020
diff --git a/apps/src/lib/util/audioApi.js b/apps/src/lib/util/audioApi.js
@@ -120,11 +120,21 @@ export const commands = {
   playSpeech(opts) {
     apiValidateType(opts, 'playSpeech', 'text', opts.text, 'string');
     apiValidateType(opts, 'playSpeech', 'gender', opts.gender, 'string');
+    apiValidateType(
+      opts,
+      'playSpeech',
+      'language',
+      opts.language,
+      'string',
+      OPTIONAL
+    );
     textToSpeech(
       opts.text,
       opts.gender,
+      opts.language,
       appOptions.azureSpeechServiceToken,
-      appOptions.azureSpeechServiceRegion
+      appOptions.azureSpeechServiceRegion,
+      appOptions.azureSpeechServiceLanguages
     );
   }
 };
@@ -137,6 +147,7 @@ export const executors = {
   playSound: (url, loop = false, callback) =>
     executeCmd(null, 'playSound', {url, loop, callback}),
   stopSound: url => executeCmd(null, 'stopSound', {url}),
-  playSpeech: (text, gender) => executeCmd(null, 'playSpeech', {text, gender})
+  playSpeech: (text, gender, language = 'en-US') =>
+    executeCmd(null, 'playSpeech', {text, gender, language})
 };
 // Note to self - can we use _.zipObject to map argumentNames to arguments here?
diff --git a/apps/src/lib/util/audioApiDropletConfig.js b/apps/src/lib/util/audioApiDropletConfig.js
@@ -1,9 +1,8 @@
+/* globals dashboard, appOptions */
 import {getStore} from '../../redux';
 import getAssetDropdown from '@cdo/apps/assetManagement/getAssetDropdown';
 import {executors} from './audioApi';
 
-/* global dashboard */
-
 /**
  * Droplet palette configuration entries, ready to drop in to their respective
  * toolkits.
@@ -36,11 +35,12 @@ const dropletConfig = {
   playSpeech: {
     func: 'playSpeech',
     parent: executors,
-    paramButtons: {minArgs: 2, maxArgs: 2},
-    paletteParams: ['text', 'gender'],
-    params: ['"Hello World!"', '"female"'],
+    paramButtons: {minArgs: 2, maxArgs: 3},
+    paletteParams: ['text', 'gender', 'language'],
+    params: ['"Hello World!"', '"female"', '"en-US"'],
     dropdown: {
-      1: ['"female"', '"male"']
+      1: ['"female"', '"male"'],
+      2: getLanguages.bind(null)
     },
     nativeCallsBackInterpreter: true,
     assetTooltip: {0: chooseAsset.bind(null, 'audio')}
@@ -54,4 +54,10 @@ function chooseAsset(typeFilter, callback) {
   });
 }
 
+function getLanguages() {
+  return Object.keys(appOptions.azureSpeechServiceLanguages).map(
+    language => `"${language}"`
+  );
+}
+
 export default dropletConfig;
diff --git a/apps/src/lib/util/speech.js b/apps/src/lib/util/speech.js
@@ -9,17 +9,30 @@ import {
  * Start playing given text as speech.
  * @param {string} text The text to play as speech.
  * @param {string} gender The gender of the voice to play.
+ * @param {string} language The language of the text.
  * @param {string} token The authorization token to access the Azure API
  * @param {string} region The region for accessing the Azure API
+ * @param {object} appLanguages The map of languages to genders and voices that can be used.
  */
-export function textToSpeech(text, gender, token, region) {
+export function textToSpeech(
+  text,
+  gender,
+  language,
+  token,
+  region,
+  appLanguages
+) {
   const speechConfig = SpeechConfig.fromAuthorizationToken(token, region);
   speechConfig.speechSynthesisOutputFormat =
     SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
-
-  const voice = gender === 'male' ? 'en-US-BenjaminRUS' : 'en-US-AriaRUS';
-  const synthesizer = new SpeechSynthesizer(speechConfig, undefined);
-  const ssml = `<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="${voice}">${text}</voice></speak>`;
+  let voice =
+    (appLanguages[language] && appLanguages[language][gender]) ||
+    appLanguages['English']['female'];
+  const synthesizer = new SpeechSynthesizer(
+    speechConfig,
+    undefined /* AudioConfig */
+  );
+  let ssml = `<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="${voice}">${text}</voice></speak>`;
   synthesizer.speakSsmlAsync(
     ssml,
     result => {

diff --git a/apps/test/unit/lib/util/audioApiTest.js b/apps/test/unit/lib/util/audioApiTest.js
@@ -6,6 +6,7 @@ import {
   injectExecuteCmd
 } from '@cdo/apps/lib/util/audioApi';
 import dropletConfig from '@cdo/apps/lib/util/audioApiDropletConfig';
+import {replaceOnWindow, restoreOnWindow} from '../../../util/testUtils';
 
 describe('Audio API', function() {
   // Check that every command, has an executor, has a droplet config entry.
@@ -79,23 +80,36 @@ describe('Audio API', function() {
   });
 
   describe('playSpeech', function() {
-    it('has two arguments, "text" and "gender"', function() {
+    beforeEach(() => {
+      replaceOnWindow('appOptions', {
+        level: {
+          projectTemplateLevelName: 'Test Project'
+        }
+      });
+    });
+
+    afterEach(() => {
+      restoreOnWindow('appOptions');
+    });
+    it('has three arguments, "text", "gender", and "language"', function() {
       const funcName = 'playSpeech';
       // Check droplet config for the 2 documented params
       expect(dropletConfig[funcName].paletteParams).to.deep.equal([
         'text',
-        'gender'
+        'gender',
+        'language'
       ]);
-      expect(dropletConfig[funcName].params).to.have.length(2);
+      expect(dropletConfig[funcName].params).to.have.length(3);
 
       // Check that executors map arguments to object correctly
       let spy = sinon.spy();
       injectExecuteCmd(spy);
-      executors[funcName]('this is text', 'female', 'nothing');
+      executors[funcName]('this is text', 'female', 'English', 'nothing');
       expect(spy).to.have.been.calledOnce;
       expect(spy.firstCall.args[2]).to.deep.equal({
         text: 'this is text',
-        gender: 'female'
+        gender: 'female',
+        language: 'English'
       });
     });
   });

diff --git a/dashboard/app/controllers/projects_controller.rb b/dashboard/app/controllers/projects_controller.rb
@@ -320,7 +320,8 @@ def show
       has_i18n: @game.has_i18n?,
       game_display_name: data_t("game.name", @game.name),
       azure_speech_service_token: azure_speech_service[:azureSpeechServiceToken],
-      azure_speech_service_region: azure_speech_service[:azureSpeechServiceRegion]
+      azure_speech_service_region: azure_speech_service[:azureSpeechServiceRegion],
+      azure_speech_service_languages: azure_speech_service[:azureSpeechServiceLanguages]
     )
 
     if params[:key] == 'artist'

diff --git a/dashboard/app/helpers/levels_helper.rb b/dashboard/app/helpers/levels_helper.rb
@@ -5,6 +5,7 @@
 require 'firebase_token_generator'
 require 'image_size'
 require 'cdo/firehose'
+require 'cdo/languages'
 require 'net/http'
 require 'uri'
 require 'json'
@@ -461,16 +462,41 @@ def firebase_options
 
   def azure_speech_service_options
     speech_service_options = {}
+
     if @level.game.use_azure_speech_service? && !CDO.azure_speech_service_region.nil? && !CDO.azure_speech_service_key.nil?
-      uri = URI.parse("https://#{CDO.azure_speech_service_region}.api.cognitive.microsoft.com/sts/v1.0/issueToken")
-      header = {'Ocp-Apim-Subscription-Key': CDO.azure_speech_service_key}
-      http = Net::HTTP.new(uri.host, uri.port)
-      http.use_ssl = true
-      http.verify_mode = OpenSSL::SSL::VERIFY_PEER
-      request = Net::HTTP::Post.new(uri.request_uri, header)
-      response = http.request(request)
-      speech_service_options[:azureSpeechServiceToken] = response.body
+      # First, get the token
+      token_uri = URI.parse("https://#{CDO.azure_speech_service_region}.api.cognitive.microsoft.com/sts/v1.0/issueToken")
+      token_header = {'Ocp-Apim-Subscription-Key': CDO.azure_speech_service_key}
+      token_http_request = Net::HTTP.new(token_uri.host, token_uri.port)
+      token_http_request.use_ssl = true
+      token_http_request.verify_mode = OpenSSL::SSL::VERIFY_PEER
+      token_request = Net::HTTP::Post.new(token_uri.request_uri, token_header)
+      token_response = token_http_request.request(token_request)
+      speech_service_options[:azureSpeechServiceToken] = token_response.body
       speech_service_options[:azureSpeechServiceRegion] = CDO.azure_speech_service_region
+
+      # Then, get the list of voices and languages
+      voice_uri = URI.parse("https://#{CDO.azure_speech_service_region}.tts.speech.microsoft.com/cognitiveservices/voices/list")
+      voice_header = {'Authorization': 'Bearer ' + token_response.body}
+      voice_http_request = Net::HTTP.new(voice_uri.host, voice_uri.port)
+      voice_http_request.use_ssl = true
+      voice_http_request.verify_mode = OpenSSL::SSL::VERIFY_PEER
+      voice_request = Net::HTTP::Get.new(voice_uri.request_uri, voice_header)
+      voice_response = voice_http_request.request(voice_request)
+
+      all_voices = JSON.parse(voice_response.body)
+      language_dictionary = {}
+      all_voices.each do |voice|
+        native_locale_name = Languages.get_native_name_by_locale(voice["Locale"])
+        unless native_locale_name.empty?
+          language_dictionary[native_locale_name[0][:native_name_s]] ||= {}
+          language_dictionary[native_locale_name[0][:native_name_s]][voice["Gender"].downcase] ||= voice["ShortName"]
+        end
+      end
+
+      language_dictionary.delete_if {|_, voices| voices.length < 2}
+
+      speech_service_options[:azureSpeechServiceLanguages] = language_dictionary
     end
     speech_service_options
   rescue SocketError, Net::OpenTimeout, Net::ReadTimeout, Errno::ECONNRESET, Errno::ECONNREFUSED, Errno::ENETUNREACH

diff --git a/dashboard/app/helpers/view_options_helper.rb b/dashboard/app/helpers/view_options_helper.rb
@@ -35,7 +35,8 @@ module ViewOptionsHelper
     :answerdash,
     :signed_replay_log_url,
     :azure_speech_service_token,
-    :azure_speech_service_region
+    :azure_speech_service_region,
+    :azure_speech_service_languages
   )
   # Sets custom options to be used by the view layer. The option hash is frozen once read.
   def view_options(opts = nil)

diff --git a/lib/cdo/languages.rb b/lib/cdo/languages.rb
@@ -32,6 +32,10 @@ def self.table
     table.select(:unique_language_s, :locale_s).where("locale_s = '#{locale}'").first[:unique_language_s]
   end
 
+  cached def self.get_native_name_by_locale(locale)
+    table.select(:native_name_s, :locale_s).where("locale_s = '#{locale}'").to_a
+  end
+
   cached def self.get_csf_languages
     table.select(:csf_b, :crowdin_name_s).to_a
   end