Various fixes to tts-adapter-sapinative

see daisy/pipeline#667
daisy · Apr 10, 2023 · 9f9547c · 9f9547c
2 parents fde6d5d + e0240a4
commit 9f9547c
Show file tree

Hide file tree

Showing 18 changed files with 302 additions and 242 deletions.
diff --git a/bom/pom.xml b/bom/pom.xml
@@ -468,7 +468,7 @@
       <dependency>
         <groupId>org.daisy.pipeline.modules</groupId>
         <artifactId>tts-adapter-sapinative</artifactId>
-        <version>3.1.0</version>
+        <version>3.1.1-SNAPSHOT</version>
       </dependency>
       <dependency>
         <groupId>org.daisy.pipeline.modules</groupId>

diff --git a/...adapters/tts-adapter-sapinative/src/main/java/org/daisy/pipeline/tts/onecore/Onecore.java b/...adapters/tts-adapter-sapinative/src/main/java/org/daisy/pipeline/tts/onecore/Onecore.java
@@ -1,12 +1,14 @@
 package org.daisy.pipeline.tts.onecore;
 
+import java.io.IOException;
+
 public class Onecore {
 
 	public static native long openConnection();
 
 	public static native int closeConnection(long connection);
 
-	public static native int speak(long connection, String voiceVendor, String voiceName, String text);
+	public static native int speak(long connection, String voiceVendor, String voiceName, String text) throws IOException;
 	/* in bytes*/
 	public static native int getStreamSize(long connection);
 

diff --git a/...ts-adapters/tts-adapter-sapinative/src/main/java/org/daisy/pipeline/tts/onecore/SAPI.java b/...ts-adapters/tts-adapter-sapinative/src/main/java/org/daisy/pipeline/tts/onecore/SAPI.java
@@ -1,12 +1,14 @@
 package org.daisy.pipeline.tts.onecore;
 
+import java.io.IOException;
+
 public class SAPI {
 
-	public static native long openConnection();
+	public static native long openConnection() throws IOException;
 
 	public static native int closeConnection(long connection);
 
-	public static native int speak(long connection, String voiceVendor, String voiceName, String text);
+	public static native int speak(long connection, String voiceVendor, String voiceName, String text) throws IOException;
 	/* in bytes*/
 	public static native int getStreamSize(long connection);
 

diff --git a/...ers/tts-adapter-sapinative/src/main/java/org/daisy/pipeline/tts/sapi/impl/SAPIEngine.java b/...ers/tts-adapter-sapinative/src/main/java/org/daisy/pipeline/tts/sapi/impl/SAPIEngine.java
@@ -79,8 +79,8 @@ public SynthesisResult synthesize(XdmNode ssml, Voice voice, TTSResource resourc
 		}
 		try {
 			List<Integer> marks = new ArrayList<>();
-			AudioInputStream audio = speak(transformSsmlNodeToString(ssml, ssmlTransformer, xsltParams),
-			                               voice, resource, marks);
+			String ssmlForEngine = transformSsmlNodeToString(ssml, ssmlTransformer, xsltParams);
+			AudioInputStream audio = speak(ssmlForEngine, voice, resource, marks);
 			return new SynthesisResult(audio, marks);
 		} catch (IOException|SaxonApiException e) {
 			throw new SynthesisException(e);
@@ -93,33 +93,54 @@ public AudioInputStream speak(String ssml, Voice voice, TTSResource resource, Li
 		voice = mVoiceFormatConverter.get(voice.name.toLowerCase());
 		ThreadResource tr = (ThreadResource)resource;
 		if (voice.engine.equals("sapi") ){
-			int res = SAPI.speak(tr.SAPIConnection, voice.engine, voice.name, ssml);
-			if (res != SAPIResult.SAPINATIVE_OK.value()) {
-				throw new SynthesisException("SAPI-legacy speak error " + res + " raised with voice "
-				                             + voice + ": " +  SAPIResult.valueOfCode(res)+"\nFor text :"
-				                             + ssml);
+			try {
+				int res = SAPI.speak(tr.SAPIConnection, voice.engine, voice.name, ssml);
+				if (res != SAPIResult.SAPINATIVE_OK.value()) {
+					throw new SynthesisException("SAPI-legacy speak error " + res + " raised with voice "
+							+ voice + ": " +  SAPIResult.valueOfCode(res)+"\nFor text :"
+							+ ssml);
+				}
+			} catch (RuntimeException e){
+				Logger.error("SAPI-legacy raised a RUNTIME exception while speaking " + ssml + " with " + voice + " : " + e.getMessage());
+				throw new SynthesisException("SAPI-legacy raised a RUNTIME exception while speaking " + ssml + " with " + voice, e);
+			} catch (Exception e){
+				Logger.error("SAPI-legacy raised an exception while speaking " + ssml + " with " + voice + " : " + e.getMessage());
+				throw new SynthesisException("SAPI-legacy raised an exception while speaking " + ssml + " with " + voice, e);
 			}
+
 			int size = SAPI.getStreamSize(tr.SAPIConnection);
 			byte[] data = new byte[size];
 			SAPI.readStream(tr.SAPIConnection, data, 0);
-			long[] bookmarksPositions = SAPI.getBookmarkPositions(tr.SAPIConnection);
+
+			String[] names = SAPI.getBookmarkNames(tr.SAPIConnection);
+			long[] positions = SAPI.getBookmarkPositions(tr.SAPIConnection);
 			float sampleRate = sapiAudioFormat.getSampleRate();
 			int bytesPerSample = sapiAudioFormat.getSampleSizeInBits() / 8;
-			for (long position : bookmarksPositions) {
-				int offset = (int) ((position * sampleRate * bytesPerSample) / 1000);
-				marks.add(offset);
+			for (int i = 0; i < names.length; ++i) {
+				int offset = (int) ((positions[i] * sampleRate * bytesPerSample) / 1000);
+				// it happens that SAPI / OneCore sometimes make empty bookmarks (for unknown reason)
+				if (names[i].length() > 0){
+					marks.add(offset);
+				}
 			}
 			return createAudioStream(sapiAudioFormat, data);
 		} else { // use onecore engine
-			int res = Onecore.speak(tr.onecoreConnection, voice.engine, voice.name, ssml);
-			if (res != OnecoreResult.SAPINATIVE_OK.value()) {
-				throw new SynthesisException("SAPI-Onecore speak error " + res + " raised with voice "
-				                             + voice + ": " +  OnecoreResult.valueOfCode(res)+"\nFor text :"
-				                             + ssml);
+			try {
+				int res = Onecore.speak(tr.onecoreConnection, voice.engine, voice.name, ssml);
+				if (res != OnecoreResult.SAPINATIVE_OK.value()) {
+					throw new SynthesisException("SAPI-Onecore speak error " + res + " raised with voice "
+							+ voice + ": " +  OnecoreResult.valueOfCode(res)+"\nFor text :"
+							+ ssml);
+				}
+			} catch (IOException e) {
+				Logger.error("SAPI-onecore raised an exception while speaking " + ssml + " with " + voice + " : " + e.getMessage());
+				throw new SynthesisException("SAPI-Onecore raised an exception while speaking " + ssml + " with " + voice, e);
 			}
+
 			int size = Onecore.getStreamSize(tr.onecoreConnection);
 			byte[] data = new byte[size];
 			Onecore.readStream(tr.onecoreConnection, data, 0);
+			String[] names = Onecore.getBookmarkNames(tr.onecoreConnection);
 			long[] pos = Onecore.getBookmarkPositions(tr.onecoreConnection);
 			AudioInputStream result;
 			try {
@@ -130,9 +151,12 @@ public AudioInputStream speak(String ssml, Voice voice, TTSResource resource, Li
 			AudioFormat resultFormat = result.getFormat();
 			float sampleRate = resultFormat.getSampleRate();
 			int bytesPerSample = resultFormat.getSampleSizeInBits() / 8;
-			for (long po : pos) {
-				int offset = (int) ((po * sampleRate * bytesPerSample) / 1000);
-				marks.add(offset);
+			for (int i = 0; i < names.length; ++i) {
+				int offset = (int) ((pos[i] * sampleRate * bytesPerSample) / 1000);
+				// it happens that SAPI / OneCore sometimes make empty bookmarks (for unknown reason)
+				if (names[i].length() > 0){
+					marks.add(offset);
+				}
 			}
 			return result;
 		}
@@ -149,11 +173,17 @@ public TTSResource allocateThreadResources() throws SynthesisException {
 			tr.onecoreConnection = connection;
 		}
 		if (this.sapiAudioFormat != null){
-			long connection = SAPI.openConnection();
-			if (connection == 0) {
-				throw new SynthesisException("could not open SAPI-Onecore context.");
+			try {
+				long connection = SAPI.openConnection();
+				if (connection == 0) {
+					throw new IOException("could not connect to SAPI-Legacy context.");
+				}
+				tr.SAPIConnection = connection;
+			} catch (IOException e) {
+				throw new SynthesisException("could not open SAPI-Legacy context.", e);
 			}
-			tr.SAPIConnection = connection;
+
+
 		}
 		return tr;
 	}
@@ -224,8 +254,14 @@ public Collection<Voice> getAvailableVoices() {
 				// Note that since onecore voice are added after sapi,
 				// they are overwriting matching sapi voices to avoid duplicates
 				try {
+					// remove the "desktop" extension of SAPI legacy microsoft voices
+					// So that onecore voices are used instead if available
+					String key = names.get(i).toLowerCase();
+					if (key.endsWith(" desktop")) {
+						key = key.substring(0,key.length() - " desktop".length());
+					}
 					mVoiceFormatConverter.put(
-						names.get(i).toLowerCase(),
+						key,
 						new Voice(
 							vendors.get(i),
 							names.get(i),

diff --git a/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/jni_helper.h b/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/jni_helper.h
@@ -47,4 +47,6 @@ jobjectArray newJavaArray(JNIEnv* env, Iterator items, size_t size, const char*
 	return jArray;
 }
 
+void raiseIOException(JNIEnv* env, const jchar* message, size_t len);
+
 #endif
diff --git a/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/onecorenative/jni_helper.cpp b/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/onecorenative/jni_helper.cpp
@@ -20,4 +20,12 @@ jobjectArray emptyJavaArray(JNIEnv* env, const char* javaClass, int size) {
 	jclass objClass = env->FindClass(javaClass);
 	jobjectArray jArray = env->NewObjectArray(size, objClass, 0);
 	return jArray;
-}
+}
+
+void raiseIOException(JNIEnv* env, const jchar* message, size_t len ) {
+	jclass exceptionClass = env->FindClass("java/io/IOException");
+	jmethodID construtor = env->GetMethodID(exceptionClass, "<init>", "(Ljava/lang/String;)V");
+	jstring messageJava = env->NewString(message, len);
+	jobject except = env->NewObject(exceptionClass, construtor, messageJava);
+	env->Throw((jthrowable)except);
+}
diff --git a/...-adapter-sapinative/src/main/jni/onecorenative/org_daisy_pipeline_tts_onecore_Onecore.cpp b/...-adapter-sapinative/src/main/jni/onecorenative/org_daisy_pipeline_tts_onecore_Onecore.cpp
@@ -36,9 +36,6 @@ ConnectionsRegistry* openedConnection = NULL;
 ///////////////////////////////////////
 
 JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_initialize(JNIEnv* env, jclass) {
-#if _DEBUG
-    std::wcout << "Initializing Onecore" << std::endl;
-#endif
     gAllVoices = new OneCoreVoice::Map();
     winrtConnection temp = winrtConnection();
     for each (auto rawVoice in temp.voices())
@@ -59,7 +56,7 @@ JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_initialize(JN
             voice
         ));
     }
-    openedConnection = new ConnectionsRegistry();
+    openedConnection = new ConnectionsRegistry(1024);
     return SAPI_OK;
 }
 
@@ -72,9 +69,6 @@ JNIEXPORT jlong JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_openConnecti
 }
 
 JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_closeConnection(JNIEnv*, jclass, jlong connection) {
-#if _DEBUG
-    std::wcout << "Closing onecore connection " << connection << std::endl;
-#endif
     Connection* conn = reinterpret_cast<Connection*>(connection);
     if (conn != NULL) {
         delete conn;
@@ -91,16 +85,10 @@ JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_closeConnecti
 
 
 JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_dispose(JNIEnv*, jclass) {
-#if _DEBUG
-    std::wcout << "Disposing of Onecore" << std::endl;
-#endif
     // Close remaining connections
     if (openedConnection != NULL) {
 
         for (ConnectionsRegistry::iterator it = openedConnection->begin(); it != openedConnection->end(); ++it) {
-#if _DEBUG
-            std::wcout << "- Cleaning onecore connection " << *it << std::endl;
-#endif
             Connection* conn = reinterpret_cast<Connection*>(*it);
             delete conn;
         }
@@ -135,25 +123,29 @@ JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_speak(JNIEnv*
     if (!(convertToUTF16(env, text, conn->sentence, MAX_SENTENCE_SIZE)))
         return TOO_LONG_TEXT;
 
-#if _DEBUG
-        std::wcout << it->second.name << " speaking " << conn->sentence << std::endl;
-#endif
-        // VoiceInformation seems to create an exception, so we use the voice display name for now
-        winrt::hstring ssmltext = winrt::hstring(conn->sentence);
-        winrt::hstring foundVoiceName = it->second.rawVoice;
+    // VoiceInformation seems to create an exception, so we use the voice display name for now
+    winrt::hstring ssmltext = winrt::hstring(conn->sentence);
+    winrt::hstring foundVoiceName = it->second.rawVoice;
 
-        try {   
-            conn->streamData = conn->onecore.speak(ssmltext, foundVoiceName);
-            conn->marksNames = conn->onecore.marksNames();
-            conn->marksPositions = conn->onecore.marksPositions();
-        }
-        catch (winrt::hresult_error const& ex)
-        {
-            winrt::hresult hr = ex.code();
-            winrt::hstring message = ex.message(); 
-            std::wcout << "Exception raised while speaking " << conn->sentence << std::endl << "With voice " << it->second.name << " : " << std::endl;
-            std::cout << message.c_str() << std::endl;
-        }
+    try {   
+        conn->streamData = conn->onecore.speak(ssmltext, foundVoiceName);
+        conn->marksNames = conn->onecore.marksNames();
+        conn->marksPositions = conn->onecore.marksPositions();
+    }
+    catch (winrt::hresult_error const& ex)
+    {
+
+        winrt::hresult hr = ex.code();
+        std::wstring message = std::wstring(ex.message().c_str());
+        std::wstring sentence = std::wstring(conn->sentence);
+        std::wostringstream excep;
+        excep << L"Error code (0x" << std::hex << hr.value << L") raised when trying to speak with OneCore SAPI" << std::endl;
+        excep << message << std::endl;
+        // Use exception instead of return result to get error code in java
+        raiseIOException(env, (const jchar*)excep.str().c_str(), excep.str().size());
+        return COULD_NOT_SPEAK;
+
+    }
 
     return SAPI_OK;
 }
@@ -191,9 +183,6 @@ struct VoiceVendorToJString {
     }
 };
 JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceVendors(JNIEnv* env, jclass) {
-#if _DEBUG
-    std::wcout << "Getting voice vendors" << std::endl;
-#endif
     if (gAllVoices != NULL) {
         return newJavaArray<OneCoreVoice::Map::iterator, VoiceVendorToJString>(
             env,
@@ -213,9 +202,6 @@ struct VoiceNameToJString {
     }
 };
 JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceNames(JNIEnv* env, jclass) {
-#if _DEBUG
-    std::wcout << "Getting voice names" << std::endl;
-#endif
     if (gAllVoices != NULL) {
         return newJavaArray<OneCoreVoice::Map::iterator, VoiceNameToJString>(
             env,
@@ -235,9 +221,6 @@ struct VoiceLocaleToJString {
     }
 };
 JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceLocales(JNIEnv* env, jclass) {
-#if _DEBUG
-    std::wcout << "Getting voice locales" << std::endl;
-#endif
     if (gAllVoices != NULL) {
         return newJavaArray<OneCoreVoice::Map::iterator, VoiceLocaleToJString>(
             env,
@@ -258,9 +241,6 @@ struct VoiceGenderToJString {
 };
 JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceGenders(JNIEnv* env, jclass)
 {
-#if _DEBUG
-    std::wcout << "Getting voice genders" << std::endl;
-#endif
     if (gAllVoices != NULL) {
         return newJavaArray<OneCoreVoice::Map::iterator, VoiceGenderToJString>(
             env,
@@ -280,9 +260,6 @@ struct VoiceAgeToJString {
 };
 JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceAges(JNIEnv* env, jclass)
 {
-#if _DEBUG
-    std::wcout << "Getting voice ages" << std::endl;
-#endif
     if (gAllVoices != NULL) {
         return newJavaArray<OneCoreVoice::Map::iterator, VoiceAgeToJString>(
             env,

diff --git a/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/onecorenative/pch.h b/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/onecorenative/pch.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <unknwn.h>
+#include <sstream>
 #include <winrt/Windows.Foundation.h>
 #include <winrt/Windows.Foundation.Collections.h>
 #include <winrt/Windows.Media.SpeechSynthesis.h>

diff --git a/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/sapinative/jni_helper.cpp b/tts/tts-adapters/tts-adapter-sapinative/src/main/jni/sapinative/jni_helper.cpp
@@ -20,4 +20,12 @@ jobjectArray emptyJavaArray(JNIEnv* env, const char* javaClass, int size) {
 	jclass objClass = env->FindClass(javaClass);
 	jobjectArray jArray = env->NewObjectArray(size, objClass, 0);
 	return jArray;
-}
+}
+
+void raiseIOException(JNIEnv* env, const jchar* message, size_t len) {
+	jclass exceptionClass = env->FindClass("java/lang/Exception");
+	jmethodID construtor = env->GetMethodID(exceptionClass, "<init>", "(Ljava/lang/String;)V");
+	jstring messageJava = env->NewString(message, len);
+	jobject except = env->NewObject(exceptionClass, construtor, messageJava);
+	env->Throw((jthrowable)except);
+}