Skip to content

Commit

Permalink
Various fixes to tts-adapter-sapinative
Browse files Browse the repository at this point in the history
  • Loading branch information
bertfrees committed Apr 10, 2023
2 parents fde6d5d + e0240a4 commit 9f9547c
Show file tree
Hide file tree
Showing 18 changed files with 302 additions and 242 deletions.
2 changes: 1 addition & 1 deletion bom/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@
<dependency>
<groupId>org.daisy.pipeline.modules</groupId>
<artifactId>tts-adapter-sapinative</artifactId>
<version>3.1.0</version>
<version>3.1.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.daisy.pipeline.modules</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package org.daisy.pipeline.tts.onecore;

import java.io.IOException;

public class Onecore {

public static native long openConnection();

public static native int closeConnection(long connection);

public static native int speak(long connection, String voiceVendor, String voiceName, String text);
public static native int speak(long connection, String voiceVendor, String voiceName, String text) throws IOException;
/* in bytes*/
public static native int getStreamSize(long connection);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
package org.daisy.pipeline.tts.onecore;

import java.io.IOException;

public class SAPI {

public static native long openConnection();
public static native long openConnection() throws IOException;

public static native int closeConnection(long connection);

public static native int speak(long connection, String voiceVendor, String voiceName, String text);
public static native int speak(long connection, String voiceVendor, String voiceName, String text) throws IOException;
/* in bytes*/
public static native int getStreamSize(long connection);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ public SynthesisResult synthesize(XdmNode ssml, Voice voice, TTSResource resourc
}
try {
List<Integer> marks = new ArrayList<>();
AudioInputStream audio = speak(transformSsmlNodeToString(ssml, ssmlTransformer, xsltParams),
voice, resource, marks);
String ssmlForEngine = transformSsmlNodeToString(ssml, ssmlTransformer, xsltParams);
AudioInputStream audio = speak(ssmlForEngine, voice, resource, marks);
return new SynthesisResult(audio, marks);
} catch (IOException|SaxonApiException e) {
throw new SynthesisException(e);
Expand All @@ -93,33 +93,54 @@ public AudioInputStream speak(String ssml, Voice voice, TTSResource resource, Li
voice = mVoiceFormatConverter.get(voice.name.toLowerCase());
ThreadResource tr = (ThreadResource)resource;
if (voice.engine.equals("sapi") ){
int res = SAPI.speak(tr.SAPIConnection, voice.engine, voice.name, ssml);
if (res != SAPIResult.SAPINATIVE_OK.value()) {
throw new SynthesisException("SAPI-legacy speak error " + res + " raised with voice "
+ voice + ": " + SAPIResult.valueOfCode(res)+"\nFor text :"
+ ssml);
try {
int res = SAPI.speak(tr.SAPIConnection, voice.engine, voice.name, ssml);
if (res != SAPIResult.SAPINATIVE_OK.value()) {
throw new SynthesisException("SAPI-legacy speak error " + res + " raised with voice "
+ voice + ": " + SAPIResult.valueOfCode(res)+"\nFor text :"
+ ssml);
}
} catch (RuntimeException e){
Logger.error("SAPI-legacy raised a RUNTIME exception while speaking " + ssml + " with " + voice + " : " + e.getMessage());
throw new SynthesisException("SAPI-legacy raised a RUNTIME exception while speaking " + ssml + " with " + voice, e);
} catch (Exception e){
Logger.error("SAPI-legacy raised an exception while speaking " + ssml + " with " + voice + " : " + e.getMessage());
throw new SynthesisException("SAPI-legacy raised an exception while speaking " + ssml + " with " + voice, e);
}

int size = SAPI.getStreamSize(tr.SAPIConnection);
byte[] data = new byte[size];
SAPI.readStream(tr.SAPIConnection, data, 0);
long[] bookmarksPositions = SAPI.getBookmarkPositions(tr.SAPIConnection);

String[] names = SAPI.getBookmarkNames(tr.SAPIConnection);
long[] positions = SAPI.getBookmarkPositions(tr.SAPIConnection);
float sampleRate = sapiAudioFormat.getSampleRate();
int bytesPerSample = sapiAudioFormat.getSampleSizeInBits() / 8;
for (long position : bookmarksPositions) {
int offset = (int) ((position * sampleRate * bytesPerSample) / 1000);
marks.add(offset);
for (int i = 0; i < names.length; ++i) {
int offset = (int) ((positions[i] * sampleRate * bytesPerSample) / 1000);
// it happens that SAPI / OneCore sometimes make empty bookmarks (for unknown reason)
if (names[i].length() > 0){
marks.add(offset);
}
}
return createAudioStream(sapiAudioFormat, data);
} else { // use onecore engine
int res = Onecore.speak(tr.onecoreConnection, voice.engine, voice.name, ssml);
if (res != OnecoreResult.SAPINATIVE_OK.value()) {
throw new SynthesisException("SAPI-Onecore speak error " + res + " raised with voice "
+ voice + ": " + OnecoreResult.valueOfCode(res)+"\nFor text :"
+ ssml);
try {
int res = Onecore.speak(tr.onecoreConnection, voice.engine, voice.name, ssml);
if (res != OnecoreResult.SAPINATIVE_OK.value()) {
throw new SynthesisException("SAPI-Onecore speak error " + res + " raised with voice "
+ voice + ": " + OnecoreResult.valueOfCode(res)+"\nFor text :"
+ ssml);
}
} catch (IOException e) {
Logger.error("SAPI-onecore raised an exception while speaking " + ssml + " with " + voice + " : " + e.getMessage());
throw new SynthesisException("SAPI-Onecore raised an exception while speaking " + ssml + " with " + voice, e);
}

int size = Onecore.getStreamSize(tr.onecoreConnection);
byte[] data = new byte[size];
Onecore.readStream(tr.onecoreConnection, data, 0);
String[] names = Onecore.getBookmarkNames(tr.onecoreConnection);
long[] pos = Onecore.getBookmarkPositions(tr.onecoreConnection);
AudioInputStream result;
try {
Expand All @@ -130,9 +151,12 @@ public AudioInputStream speak(String ssml, Voice voice, TTSResource resource, Li
AudioFormat resultFormat = result.getFormat();
float sampleRate = resultFormat.getSampleRate();
int bytesPerSample = resultFormat.getSampleSizeInBits() / 8;
for (long po : pos) {
int offset = (int) ((po * sampleRate * bytesPerSample) / 1000);
marks.add(offset);
for (int i = 0; i < names.length; ++i) {
int offset = (int) ((pos[i] * sampleRate * bytesPerSample) / 1000);
// it happens that SAPI / OneCore sometimes make empty bookmarks (for unknown reason)
if (names[i].length() > 0){
marks.add(offset);
}
}
return result;
}
Expand All @@ -149,11 +173,17 @@ public TTSResource allocateThreadResources() throws SynthesisException {
tr.onecoreConnection = connection;
}
if (this.sapiAudioFormat != null){
long connection = SAPI.openConnection();
if (connection == 0) {
throw new SynthesisException("could not open SAPI-Onecore context.");
try {
long connection = SAPI.openConnection();
if (connection == 0) {
throw new IOException("could not connect to SAPI-Legacy context.");
}
tr.SAPIConnection = connection;
} catch (IOException e) {
throw new SynthesisException("could not open SAPI-Legacy context.", e);
}
tr.SAPIConnection = connection;


}
return tr;
}
Expand Down Expand Up @@ -224,8 +254,14 @@ public Collection<Voice> getAvailableVoices() {
// Note that since onecore voice are added after sapi,
// they are overwriting matching sapi voices to avoid duplicates
try {
// remove the "desktop" extension of SAPI legacy microsoft voices
// So that onecore voices are used instead if available
String key = names.get(i).toLowerCase();
if (key.endsWith(" desktop")) {
key = key.substring(0,key.length() - " desktop".length());
}
mVoiceFormatConverter.put(
names.get(i).toLowerCase(),
key,
new Voice(
vendors.get(i),
names.get(i),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,6 @@ jobjectArray newJavaArray(JNIEnv* env, Iterator items, size_t size, const char*
return jArray;
}

void raiseIOException(JNIEnv* env, const jchar* message, size_t len);

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,12 @@ jobjectArray emptyJavaArray(JNIEnv* env, const char* javaClass, int size) {
jclass objClass = env->FindClass(javaClass);
jobjectArray jArray = env->NewObjectArray(size, objClass, 0);
return jArray;
}
}

void raiseIOException(JNIEnv* env, const jchar* message, size_t len ) {
jclass exceptionClass = env->FindClass("java/io/IOException");
jmethodID construtor = env->GetMethodID(exceptionClass, "<init>", "(Ljava/lang/String;)V");
jstring messageJava = env->NewString(message, len);
jobject except = env->NewObject(exceptionClass, construtor, messageJava);
env->Throw((jthrowable)except);
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@ ConnectionsRegistry* openedConnection = NULL;
///////////////////////////////////////

JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_initialize(JNIEnv* env, jclass) {
#if _DEBUG
std::wcout << "Initializing Onecore" << std::endl;
#endif
gAllVoices = new OneCoreVoice::Map();
winrtConnection temp = winrtConnection();
for each (auto rawVoice in temp.voices())
Expand All @@ -59,7 +56,7 @@ JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_initialize(JN
voice
));
}
openedConnection = new ConnectionsRegistry();
openedConnection = new ConnectionsRegistry(1024);
return SAPI_OK;
}

Expand All @@ -72,9 +69,6 @@ JNIEXPORT jlong JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_openConnecti
}

JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_closeConnection(JNIEnv*, jclass, jlong connection) {
#if _DEBUG
std::wcout << "Closing onecore connection " << connection << std::endl;
#endif
Connection* conn = reinterpret_cast<Connection*>(connection);
if (conn != NULL) {
delete conn;
Expand All @@ -91,16 +85,10 @@ JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_closeConnecti


JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_dispose(JNIEnv*, jclass) {
#if _DEBUG
std::wcout << "Disposing of Onecore" << std::endl;
#endif
// Close remaining connections
if (openedConnection != NULL) {

for (ConnectionsRegistry::iterator it = openedConnection->begin(); it != openedConnection->end(); ++it) {
#if _DEBUG
std::wcout << "- Cleaning onecore connection " << *it << std::endl;
#endif
Connection* conn = reinterpret_cast<Connection*>(*it);
delete conn;
}
Expand Down Expand Up @@ -135,25 +123,29 @@ JNIEXPORT jint JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_speak(JNIEnv*
if (!(convertToUTF16(env, text, conn->sentence, MAX_SENTENCE_SIZE)))
return TOO_LONG_TEXT;

#if _DEBUG
std::wcout << it->second.name << " speaking " << conn->sentence << std::endl;
#endif
// VoiceInformation seems to create an exception, so we use the voice display name for now
winrt::hstring ssmltext = winrt::hstring(conn->sentence);
winrt::hstring foundVoiceName = it->second.rawVoice;
// VoiceInformation seems to create an exception, so we use the voice display name for now
winrt::hstring ssmltext = winrt::hstring(conn->sentence);
winrt::hstring foundVoiceName = it->second.rawVoice;

try {
conn->streamData = conn->onecore.speak(ssmltext, foundVoiceName);
conn->marksNames = conn->onecore.marksNames();
conn->marksPositions = conn->onecore.marksPositions();
}
catch (winrt::hresult_error const& ex)
{
winrt::hresult hr = ex.code();
winrt::hstring message = ex.message();
std::wcout << "Exception raised while speaking " << conn->sentence << std::endl << "With voice " << it->second.name << " : " << std::endl;
std::cout << message.c_str() << std::endl;
}
try {
conn->streamData = conn->onecore.speak(ssmltext, foundVoiceName);
conn->marksNames = conn->onecore.marksNames();
conn->marksPositions = conn->onecore.marksPositions();
}
catch (winrt::hresult_error const& ex)
{

winrt::hresult hr = ex.code();
std::wstring message = std::wstring(ex.message().c_str());
std::wstring sentence = std::wstring(conn->sentence);
std::wostringstream excep;
excep << L"Error code (0x" << std::hex << hr.value << L") raised when trying to speak with OneCore SAPI" << std::endl;
excep << message << std::endl;
// Use exception instead of return result to get error code in java
raiseIOException(env, (const jchar*)excep.str().c_str(), excep.str().size());
return COULD_NOT_SPEAK;

}

return SAPI_OK;
}
Expand Down Expand Up @@ -191,9 +183,6 @@ struct VoiceVendorToJString {
}
};
JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceVendors(JNIEnv* env, jclass) {
#if _DEBUG
std::wcout << "Getting voice vendors" << std::endl;
#endif
if (gAllVoices != NULL) {
return newJavaArray<OneCoreVoice::Map::iterator, VoiceVendorToJString>(
env,
Expand All @@ -213,9 +202,6 @@ struct VoiceNameToJString {
}
};
JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceNames(JNIEnv* env, jclass) {
#if _DEBUG
std::wcout << "Getting voice names" << std::endl;
#endif
if (gAllVoices != NULL) {
return newJavaArray<OneCoreVoice::Map::iterator, VoiceNameToJString>(
env,
Expand All @@ -235,9 +221,6 @@ struct VoiceLocaleToJString {
}
};
JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceLocales(JNIEnv* env, jclass) {
#if _DEBUG
std::wcout << "Getting voice locales" << std::endl;
#endif
if (gAllVoices != NULL) {
return newJavaArray<OneCoreVoice::Map::iterator, VoiceLocaleToJString>(
env,
Expand All @@ -258,9 +241,6 @@ struct VoiceGenderToJString {
};
JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceGenders(JNIEnv* env, jclass)
{
#if _DEBUG
std::wcout << "Getting voice genders" << std::endl;
#endif
if (gAllVoices != NULL) {
return newJavaArray<OneCoreVoice::Map::iterator, VoiceGenderToJString>(
env,
Expand All @@ -280,9 +260,6 @@ struct VoiceAgeToJString {
};
JNIEXPORT jobjectArray JNICALL Java_org_daisy_pipeline_tts_onecore_Onecore_getVoiceAges(JNIEnv* env, jclass)
{
#if _DEBUG
std::wcout << "Getting voice ages" << std::endl;
#endif
if (gAllVoices != NULL) {
return newJavaArray<OneCoreVoice::Map::iterator, VoiceAgeToJString>(
env,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <unknwn.h>
#include <sstream>
#include <winrt/Windows.Foundation.h>
#include <winrt/Windows.Foundation.Collections.h>
#include <winrt/Windows.Media.SpeechSynthesis.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,12 @@ jobjectArray emptyJavaArray(JNIEnv* env, const char* javaClass, int size) {
jclass objClass = env->FindClass(javaClass);
jobjectArray jArray = env->NewObjectArray(size, objClass, 0);
return jArray;
}
}

void raiseIOException(JNIEnv* env, const jchar* message, size_t len) {
jclass exceptionClass = env->FindClass("java/lang/Exception");
jmethodID construtor = env->GetMethodID(exceptionClass, "<init>", "(Ljava/lang/String;)V");
jstring messageJava = env->NewString(message, len);
jobject except = env->NewObject(exceptionClass, construtor, messageJava);
env->Throw((jthrowable)except);
}
Loading

0 comments on commit 9f9547c

Please sign in to comment.