In [1]:
from PyQt5 import QtWidgets, uic, QtGui, QtMultimedia
from PyQt5.QtWidgets import (QMainWindow, QTextEdit,
                QAction, QFileDialog, QApplication)
from PyQt5.QtCore import QThread, pyqtSignal, QUrl

import mainwindow
import sys
import json
import os
from os.path import join, dirname
from ibm_watson import SpeechToTextV1, LanguageTranslatorV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from google.cloud import speech
from google.cloud import speech_v1p1beta1 as speech
from google.cloud import translate_v2 as translate
import io
import urllib.request
import requests
import pyglet

In [2]:
class CallWatson(QThread):
    throw_results_recognition = pyqtSignal(list, float)
    throw_results_translation = pyqtSignal(list)
    throw_results_tts = pyqtSignal()
    play_over = pyqtSignal()
    
    def __init__(self, file):
        QThread.__init__(self)
        self.path = file
        self.russian_path = ''
        self.text = []
        self.russian_text = []
    
    def __del__(self):
        self.wait()
    
    def get_string(self, input_text):
        output_text = ''
        for item in input_text:
            output_text += item + ' '
        return output_text
    
    
    def run_recognition(self):
        authenticator = IAMAuthenticator('ldfzprXgznF0Ti5cGYrYHRcCTJ57HC5Pzlvt2huCqQ39')
        speech_to_text = SpeechToTextV1(authenticator=authenticator)
        speech_to_text.set_service_url(
            'https://api.eu-gb.speech-to-text.watson.cloud.ibm.com/instances/011afe6f-1a89-4893-9443-4ab7558f05a6')
 
        with open(self.path, 'rb') as audio_file:
            speech_recognition_results = speech_to_text.recognize(
                audio=audio_file,
                content_type='audio/ogg',
                speaker_labels=True
                ).get_result()
        self.text = [speech_recognition_results['results'][0]['alternatives'][0]['transcript']] 
        self.throw_results_recognition.emit(
            self.text, speech_recognition_results['results'][0]['alternatives'][0]['confidence']
            )
  

        
    
    
    def run_translation(self):
        authenticator = IAMAuthenticator('jx6Q0eYZWzdZ8N9VcJOXmpubjt7hVpq5yHi-15vsT9sx')
        language_translator = LanguageTranslatorV3(
            version='2018-05-01',
            authenticator=authenticator
            )
        language_translator.set_service_url(
           'https://api.eu-gb.language-translator.watson.cloud.ibm.com/instances/07b13a19-da91-434a-90e7-f1957b4f3829'
            )
 
        translation = language_translator.translate(
            text=self.text,
            model_id='en-ru'
            ).get_result()
        russian_text = []
        for ind in range(len(translation['translations'])):
            russian_text.append(translation['translations'][ind]['translation'])
        self.russian_text = russian_text
        self.throw_results_translation.emit(russian_text)
        
        
    def run_google_recognition(self):
        from google.cloud import speech_v1p1beta1 as speech
        import io
 
        client = speech.SpeechClient()
 
        with io.open(self.path, 'rb') as audio_file:
            content = audio_file.read()
 
        audio = speech.RecognitionAudio(content=content)
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.OGG_OPUS,
            sample_rate_hertz=24000,
            language_code='en-US',
            )
 
        response = client.recognize(config=config, audio=audio)
        self.text = [response.results[0].alternatives[0].transcript]
        self.throw_results_recognition.emit(self.text, response.results[0].alternatives[0].confidence)
    
    def run_google_translation(self):
        translate_client = translate.Client()
        translation = translate_client.translate(self.text, target_language='ru')
        russian_text = []
        for ind in range(len(translation)):
            russian_text.append(translation[ind]['translatedText'])
        self.russian_text = russian_text
        self.throw_results_translation.emit(russian_text)
     
    
    
    def run_google_tts(self):
        from google.cloud import texttospeech
 
        text = self.get_string(self.russian_text)
 
        client = texttospeech.TextToSpeechClient()
 
        synthesis_input = texttospeech.SynthesisInput(text=text)
 
        voice = texttospeech.VoiceSelectionParams(
            language_code="ru-RU", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
            )
 
        audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.OGG_OPUS
            )
 
        response = client.synthesize_speech(
            input=synthesis_input, voice=voice, audio_config=audio_config
            )
 
        with open(self.russian_path, "wb") as out:
            out.write(response.audio_content)
        self.throw_results_tts.emit()
        
        
    def run_yandex_recognition(self):
        FOLDER_ID = "b1gtbdqecqnfk26eabio"
        IAM_TOKEN = "t1.9euelZqJyM7NyM2akseWy4-KjpWQye3rnpWajI3PmpLOlsqVl4qYjZ7Gzsbl8_cGQlkA-u8PBnhp_N3z90ZwVgD67w8GeGn8.kiydOoy3zYwHPkszImR-SIAFD8CwOwiRMZ5Tca7uXY1dnsCpP-pakXjLtFOR0IWUJdFfcfn5jXtywI0fY6LDCw"
        with open(self.path, "rb") as f:
            data = f.read()
        params = "&".join([ "topic=general",
            "folderId=%s" % FOLDER_ID,
            "lang=en-US"])
        url = urllib.request.Request("https://stt.api.cloud.yandex.net/speech/v1/stt:recognize?%s" % params, data=data)
        url.add_header("Authorization", "Bearer %s" % IAM_TOKEN)
        responseData = urllib.request.urlopen(url).read().decode('UTF-8')
        decodedData = json.loads(responseData)
        self.text = [decodedData.get("result")]
        self.throw_results_recognition.emit([decodedData.get("result")], 0.90)
     
    
    
    def run_yandex_translation(self):
        FOLDER_ID = "b1gtbdqecqnfk26eabio"
        IAM_TOKEN = "t1.9euelZqJyM7NyM2akseWy4-KjpWQye3rnpWajI3PmpLOlsqVl4qYjZ7Gzsbl8_cGQlkA-u8PBnhp_N3z90ZwVgD67w8GeGn8.kiydOoy3zYwHPkszImR-SIAFD8CwOwiRMZ5Tca7uXY1dnsCpP-pakXjLtFOR0IWUJdFfcfn5jXtywI0fY6LDCw"
        url = 'https://translate.api.cloud.yandex.net/translate/v2/translate'
        headers = { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + IAM_TOKEN}
        data = {"folder_id": FOLDER_ID, "texts": self.text, "targetLanguageCode": "ru"}
        with open("data_file.json", "w") as write_file:
            json.dump(data, write_file)
        d = open("data_file.json")
        resp = requests.post(url, headers=headers, data=d)
        self.russian_text = [resp.json()['translations'][0]['text']]
        self.throw_results_translation.emit([resp.json()['translations'][0]['text']])
  

    def yandex_request(self, text):
        FOLDER_ID = "b1gtbdqecqnfk26eabio"
        IAM_TOKEN = "t1.9euelZqJyM7NyM2akseWy4-KjpWQye3rnpWajI3PmpLOlsqVl4qYjZ7Gzsbl8_cGQlkA-u8PBnhp_N3z90ZwVgD67w8GeGn8.kiydOoy3zYwHPkszImR-SIAFD8CwOwiRMZ5Tca7uXY1dnsCpP-pakXjLtFOR0IWUJdFfcfn5jXtywI0fY6LDCw"
        url = 'https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize'
        headers = {
            'Authorization': 'Bearer ' + IAM_TOKEN,
            }
 
        data = {
            'text': text,
            'lang': 'ru-RU',
            'folderId': FOLDER_ID
            }
 
        with requests.post(url, headers=headers, data=data, stream=True) as resp:
            for chunk in resp.iter_content(chunk_size=None):
                yield chunk
     
    
    def run_yandex_tts(self):
        text = self.get_string(self.russian_text)
        with open(self.russian_path, "wb") as f:
            for audio_content in self.yandex_request(text):
                f.write(audio_content)
        self.throw_results_tts.emit()
        
        
        
    def play_sound(self):
        song = pyglet.media.load(self.russian_path)
        song.play()
        self.play_over.emit()

In [3]:
class MainWindow(QMainWindow):
    start_recognition = pyqtSignal()
    start_translation = pyqtSignal()
    start_tts = pyqtSignal()
    start_play = pyqtSignal()
    
    def __init__(self):
        super(MainWindow, self).__init__()
        self.ui = mainwindow.Ui_MainWindow()
        self.ui.setupUi(self)
        self.ui.get_file_button.clicked.connect(self.get_file_name)
        self.ui.recognize_button.clicked.connect(self.call_recognition)
        self.ui.translate_button.clicked.connect(self.call_translation)
        self.ui.vocalize_button.clicked.connect(self.call_tts)
        self.initUi()
        self.path = ""
    
    def initUi(self):
        self.setStyleSheet("background-color: white;") 
        self.ui.get_file_button.setStyleSheet(
            'QPushButton {background-color: #00a550; color : white};'
            )
        self.ui.recognize_button.setStyleSheet(
            'QPushButton {background-color: #007539; color : white};'
            )
        self.ui.translate_button.setStyleSheet(
            'QPushButton {background-color: #004725; color : white};'
            )
        self.ui.vocalize_button.setStyleSheet(
            'QPushButton {background-color: #002800; color : white};'
            )
    
    def get_file_name(self):
        self.path = QFileDialog.getOpenFileName(self, "Open Dialog", "", "*.*")[0]
    
    def catch_results_recognition(self,result, confidence):
        self.ui.get_file_button.setEnabled(True)
        self.ui.recognize_button.setEnabled(True)
        self.ui.translate_button.setEnabled(True)
        for line in result:
            self.ui.recognized_text.insertPlainText(line + chr(10))
        
    def catch_results_translation(self,result):
        self.ui.get_file_button.setEnabled(True)
        self.ui.recognize_button.setEnabled(True)
        self.ui.translate_button.setEnabled(True)
        for line in result:
            self.ui.translated_text.insertPlainText(line + chr(10))
            
            
    def catch_results_tts(self, file_name):
        pass
    
    def call_recognition(self):
        self.ui.recognized_text.clear()
        self.ui.translated_text.clear()
        self.ui.get_file_button.setEnabled(False)
        self.ui.recognize_button.setEnabled(False)
        self.ui.translate_button.setEnabled(False)
        self.watson = CallWatson(self.path)
        if self.ui.is_watson.isChecked():
            self.start_recognition.connect(self.watson.run_recognition)
        elif self.ui.is_google.isChecked():
            self.start_recognition.connect(self.watson.run_google_recognition)
        elif self.ui.is_yandex.isChecked():
            self.start_recognition.connect(self.watson.run_yandex_recognition)
        else:
            return
        self.watson.throw_results_recognition.connect(
            self.catch_results_recognition
            )
        self.start_recognition.emit()
    
    def call_translation(self):
        self.ui.translated_text.clear()
        self.ui.get_file_button.setEnabled(False)
        self.ui.recognize_button.setEnabled(False)
        self.ui.translate_button.setEnabled(False)
        if self.ui.is_watson.isChecked():
            self.start_translation.connect(self.watson.run_translation)
        elif self.ui.is_google.isChecked():
            self.start_translation.connect(self.watson.run_google_translation)
        elif self.ui.is_yandex.isChecked():
            self.start_translation.connect(self.watson.run_yandex_translation)
        else:
            return
        self.watson.throw_results_translation.connect(
            self.catch_results_translation
            )
        self.start_translation.emit()
    
   

    def call_tts(self):
        self.ui.get_file_button.setEnabled(False)
        self.ui.recognize_button.setEnabled(False)
        self.ui.translate_button.setEnabled(False)
        filename, _ = QFileDialog.getSaveFileName(
            self, "Save audio file", '', "Audio Files (*.ogg)"
            )
        if filename == '':
            return
        self.watson.russian_path = filename
        if self.ui.is_google.isChecked():
            self.start_tts.connect(self.watson.run_google_tts)
        elif self.ui.is_yandex.isChecked():
            self.start_tts.connect(self.watson.run_yandex_tts)
        else:
            return
        self.watson.throw_results_tts.connect(
            self.play_audio
            )
        self.start_tts.emit()
    

    def play_audio(self):
        self.ui.get_file_button.setEnabled(False)
        self.ui.recognize_button.setEnabled(False)
        self.ui.translate_button.setEnabled(False)
        self.start_play.connect(self.watson.play_sound)
        self.watson.play_over.connect(self.end_play)
        self.start_play.emit()
        
        
        
    def end_play(self):
        self.ui.get_file_button.setEnabled(True)
        self.ui.recognize_button.setEnabled(True)
        self.ui.translate_button.setEnabled(True)

In [None]:
if __name__ == "__main__":
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=r"text-to-speech-9ecdd456fe20.json"
    app = QtWidgets.QApplication([])
    application = MainWindow()
    application.show()
    sys.exit(app.exec())

