Skip to content

Commit

Permalink
fix: use sdk to complete azure text-to-speech
Browse files Browse the repository at this point in the history
fix: use sdk to complete azure text-to-speech
  • Loading branch information
hahahumble committed Apr 21, 2023
2 parents 6e196db + 4f0801a commit 6fb0ae2
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 49 deletions.
2 changes: 1 addition & 1 deletion src/apis/amazonPolly.ts
Expand Up @@ -10,7 +10,7 @@ const speechParams = {
Engine: 'neural',
};

export default async function generateSpeechUrl(
export default async function speechSynthesizeWithPolly(
text: string,
voiceId: string = 'Matthew',
engine: string = 'neural',
Expand Down
55 changes: 23 additions & 32 deletions src/apis/azureTTS.ts
@@ -1,41 +1,32 @@
import axios, { AxiosRequestConfig } from 'axios';
import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
import { azureSynthesisErrorNotify } from '../components/Notification';

const textToSpeech = async (
const speechSynthesizeWithAzure = async (
subscriptionKey: string,
region: string,
text: string,
voiceName: string,
language: string
) => {
const request: AxiosRequestConfig = {
method: 'POST',
url: `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`,
headers: {
'Content-Type': 'application/ssml+xml',
'X-Microsoft-OutputFormat': 'riff-16khz-16bit-mono-pcm',
Authorization: `Bearer ${await getAccessToken(subscriptionKey, region)}`,
console.time('Azure speech synthesis');
const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, region);
speechConfig.speechRecognitionLanguage = language;
speechConfig.speechSynthesisVoiceName = voiceName;
const player = new sdk.SpeakerAudioDestination();
const audioConfig = sdk.AudioConfig.fromSpeakerOutput(player);
const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
speechSynthesizer.speakTextAsync(
text,
result => {
console.timeEnd('Azure speech synthesis');
speechSynthesizer.close();
},
data: `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${language}'><voice name='${voiceName}'>${text}</voice></speak>`,
responseType: 'arraybuffer',
};

const response = await axios(request);

return new Blob([response.data], { type: 'audio/wav' });
};

const getAccessToken = async (subscriptionKey: string, region: string) => {
const request: AxiosRequestConfig = {
method: 'POST',
url: `https://${region}.api.cognitive.microsoft.com/sts/v1.0/issueToken`,
headers: {
'Ocp-Apim-Subscription-Key': subscriptionKey,
},
};

const response = await axios(request);

return response.data;
error => {
console.log(error);
azureSynthesisErrorNotify();
speechSynthesizer.close();
}
);
return player;
};

export default textToSpeech;
export default speechSynthesizeWithAzure;
18 changes: 18 additions & 0 deletions src/apis/azureToken.ts
@@ -0,0 +1,18 @@
import axios from 'axios';

export async function getAzureToken(subscriptionKey: string, region: string): Promise<string> {
const url = `https://${region}.api.cognitive.microsoft.com/sts/v1.0/issueToken`;

try {
const response = await axios.post(url, null, {
headers: {
'Ocp-Apim-Subscription-Key': subscriptionKey,
'Content-Type': 'application/x-www-form-urlencoded',
},
});

return response.data;
} catch (error) {
throw new Error(`Error getting token: ${error}`);
}
}
6 changes: 6 additions & 0 deletions src/components/Notification.tsx
Expand Up @@ -111,6 +111,12 @@ export const azureSynthesisErrorNotify = () => {
});
};

export const invalidAzureKeyNotify = () => {
toast.error(i18next.t('notification.invalid-azure-key') as string, {
style: notificationStyle,
});
};

// AWS
export const awsErrorNotify = () => {
toast.error(i18next.t('notification.polly-synthesis-error') as string, {
Expand Down
1 change: 1 addition & 0 deletions src/locales/en.json
Expand Up @@ -157,6 +157,7 @@
"azure-synthesis-error": "There was an error with Azure speech synthesis",
"azure-recognition-error": "There was an error with Azure speech recognition",
"polly-synthesis-error": "There was an error with Amazon Polly speech synthesis",
"invalid-azure-key": "Invalid Azure key or region",
"cannot-be-empty": "This field cannot be empty",
"invalid-access-code": "Invalid access code"
}
Expand Down
1 change: 1 addition & 0 deletions src/locales/es.json
Expand Up @@ -157,6 +157,7 @@
"azure-synthesis-error": "Se ha producido un error con la síntesis de voz de Azure",
"azure-recognition-error": "Se ha producido un error en el reconocimiento de voz de Azure",
"polly-synthesis-error": "Se ha producido un error con la síntesis de voz de Amazon Polly",
"invalid-azure-key": "Invalid Azure key or region",
"cannot-be-empty": "This field cannot be empty",
"invalid-access-code": "Invalid access code"
}
Expand Down
1 change: 1 addition & 0 deletions src/locales/zh-CN.json
Expand Up @@ -157,6 +157,7 @@
"azure-synthesis-error": "Azure 语音合成错误",
"azure-recognition-error": "Azure 语音识别错误",
"polly-synthesis-error": "Amazon Polly 语音合成错误",
"invalid-azure-key": "无效的 Azure Key 或 Region",
"cannot-be-empty": "不能为空",
"invalid-access-code": "无效的访问密码"
}
Expand Down
1 change: 1 addition & 0 deletions src/pages/Home.tsx
Expand Up @@ -30,6 +30,7 @@ function Home() {
azureRecognitionErrorNotify: Notify.azureRecognitionErrorNotify,
awsErrorNotify: Notify.awsErrorNotify,
emptyAzureKeyNotify: Notify.emptyAzureKeyNotify,
invalidAzureKeyNotify: Notify.invalidAzureKeyNotify,
cannotBeEmptyNotify: Notify.cannotBeEmptyNotify,
invalidAccessCodeNotify: Notify.invalidAccessCodeNotify,
};
Expand Down
40 changes: 24 additions & 16 deletions src/utils/speechSynthesis.ts
@@ -1,5 +1,7 @@
import generateSpeechUrl from '../apis/amazonPolly';
import textToSpeech from '../apis/azureTTS';
import speechSynthesizeWithPolly from '../apis/amazonPolly';
import speechSynthesizeWithAzure from '../apis/azureTTS';
import { SpeakerAudioDestination } from 'microsoft-cognitiveservices-speech-sdk';
import { getAzureToken } from '../apis/azureToken';

interface SpeechSynthesisOptions {
text: string;
Expand All @@ -26,7 +28,7 @@ interface getPollyVoicesOptions {

const synthesis = window.speechSynthesis;
let pollyAudio: HTMLAudioElement | null = null;
let azureAudio: HTMLAudioElement | null = null;
let azureAudio: SpeakerAudioDestination | null = null;

async function getPollyVoices({
text,
Expand All @@ -36,7 +38,7 @@ async function getPollyVoices({
accessKeyId,
secretAccessKey,
}: getPollyVoicesOptions) {
return await generateSpeechUrl(text, voiceName, engine, region, accessKeyId, secretAccessKey);
return await speechSynthesizeWithPolly(text, voiceName, engine, region, accessKeyId, secretAccessKey);
}

function pollyEngineName(engine: string | undefined) {
Expand Down Expand Up @@ -129,18 +131,24 @@ export function speechSynthesis({
});
break;
case 'Azure TTS':
textToSpeech(secretAccessKey || '', region || 'eastus', text, voiceName, language)
.then(audioBlob => {
azureAudio = new Audio(URL.createObjectURL(audioBlob));
azureAudio.play().then(() => {
// resolve();
});
azureAudio.onended = () => {
if (secretAccessKey == '') {
reject('Azure access key is empty');
notify.emptyAzureKeyNotify();
return;
}
// Check if secret access key and region is valid
getAzureToken(secretAccessKey || '', region || 'eastus')
.then(token => {})
.catch(error => {
notify.invalidAzureKeyNotify();
reject(error);
});
speechSynthesizeWithAzure(secretAccessKey || '', region || 'eastus', text, voiceName, language)
.then(player => {
azureAudio = player;
player.onAudioEnd = () => {
resolve();
};
azureAudio.onerror = error => {
reject(error);
};
})
.catch(error => {
console.error(error);
Expand All @@ -164,7 +172,7 @@ export function stopSpeechSynthesis() {
}
if (azureAudio) {
azureAudio.pause();
azureAudio.currentTime = 0;
azureAudio.close();
}
}

Expand Down Expand Up @@ -192,6 +200,6 @@ export function resumeSpeechSynthesis() {
pollyAudio.play();
}
if (azureAudio) {
azureAudio.play();
azureAudio.resume();
}
}

1 comment on commit 6fb0ae2

@vercel
Copy link

@vercel vercel bot commented on 6fb0ae2 Apr 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

speechgpt – ./

speechgpt-hahahumble.vercel.app
speechgpt-alpha.vercel.app
speechgpt-git-main-hahahumble.vercel.app

Please sign in to comment.