In [1]:
from PyQt5 import QtWidgets, uic, QtGui, QtMultimedia
from PyQt5.QtWidgets import (QMainWindow, QTextEdit,
                QAction, QFileDialog, QApplication)
from PyQt5.QtCore import QThread, pyqtSignal, QUrl

import mainwindow
import sys
import json
import os
from os.path import join, dirname
from ibm_watson import SpeechToTextV1, LanguageTranslatorV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from google.cloud import speech
from google.cloud import speech_v1p1beta1 as speech
from google.cloud import translate_v2 as translate
import io

In [2]:
class CallWatson(QThread):
    throw_results_recognition = pyqtSignal(list, float)
    throw_results_translation = pyqtSignal(list)
    
    def __init__(self, file):
        QThread.__init__(self)
        self.path = file
        self.text = []
    
    def __del__(self):
        self.wait()
        
    def run_recognition(self):
        authenticator = IAMAuthenticator('ldfzprXgznF0Ti5cGYrYHRcCTJ57HC5Pzlvt2huCqQ39')
        speech_to_text = SpeechToTextV1(authenticator=authenticator)
        speech_to_text.set_service_url(
            'https://api.eu-gb.speech-to-text.watson.cloud.ibm.com/instances/011afe6f-1a89-4893-9443-4ab7558f05a6'
            )
        with open(self.path, 'rb') as audio_file:
            speech_recognition_results = speech_to_text.recognize(
                audio=audio_file,
                content_type='audio/mp3',
                speaker_labels=True
            ).get_result()
        recognized_words = speech_recognition_results['results'][0]['alternatives'][0]['timestamps']
        recognized_speakers = speech_recognition_results['speaker_labels']
        text = []
        ind_in_text = -1
        prev_speaker = -1
        for ind_in_json in range(len(recognized_words)):
            speaker = recognized_speakers[ind_in_json]['speaker']
            if speaker != prev_speaker:
                ind_in_text += 1
                text.append('Speaker ' + str(speaker + 1) + ':')
            text[ind_in_text] += ' ' + recognized_words[ind_in_json][0]
            prev_speaker = speaker
        
        self.text = text 
        self.throw_results_recognition.emit(
            text, speech_recognition_results['results'][0]['alternatives'][0]['confidence']
            )

        
    def run_translation(self):
        authenticator = IAMAuthenticator('jx6Q0eYZWzdZ8N9VcJOXmpubjt7hVpq5yHi-15vsT9sx')
        language_translator = LanguageTranslatorV3(
            version='2018-05-01',
            authenticator=authenticator
            )
        language_translator.set_service_url(
           'https://api.eu-gb.language-translator.watson.cloud.ibm.com/instances/07b13a19-da91-434a-90e7-f1957b4f3829'
            )
 
        translation = language_translator.translate(
            text=self.text,
            model_id='en-ru'
            ).get_result()
        russian_text = []
        for ind in range(len(translation['translations'])):
            russian_text.append(translation['translations'][ind]['translation'])
        self.throw_results_translation.emit(russian_text)
        
    def run_google_recognition(self):
        client = speech.SpeechClient()
 
        with io.open(self.path, 'rb') as audio_file:
            content = audio_file.read()
        audio = speech.RecognitionAudio(content=content)
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
            sample_rate_hertz=8000,
            language_code='en-US',
            enable_speaker_diarization=True
            )
        response = client.recognize(config=config, audio=audio)
        words_info = response.results[-1].alternatives[0].words
        text = []
        ind_in_text = -1
        prev_speaker = -1
        for word_info in words_info:
            speaker = word_info.speaker_tag
            if speaker != prev_speaker:
                ind_in_text += 1
                text.append('Speaker ' + str(speaker) + ':')
            text[ind_in_text] += ' ' + word_info.word
            prev_speaker = speaker
        self.text = text
        self.throw_results_recognition.emit(text, response.results[0].alternatives[0].confidence)
    
    def run_google_translation(self):
        translate_client = translate.Client()
        translation = translate_client.translate(self.text, target_language='ru')
        russian_text = []
        for ind in range(len(translation)):
            russian_text.append(translation[ind]['translatedText'])
        self.throw_results_translation.emit(russian_text)

In [3]:
class MainWindow(QMainWindow):
    start_recognition = pyqtSignal()
    start_translation = pyqtSignal()
    
    def __init__(self):
        super(MainWindow, self).__init__()
        self.ui = mainwindow.Ui_MainWindow()
        self.ui.setupUi(self)
        self.ui.get_file_button.clicked.connect(self.get_file_name)
        self.ui.recognize_button.clicked.connect(self.call_recognition)
        self.ui.translate_button.clicked.connect(self.call_translation)
        self.initUi()
        self.path = ""
    
    def initUi(self):
        self.setStyleSheet("background-color: white;") 
        self.ui.get_file_button.setStyleSheet(
            'QPushButton {background-color: #00a550; color : white};'
            )
        self.ui.recognize_button.setStyleSheet(
            'QPushButton {background-color: #007539; color : white};'
            )
        self.ui.translate_button.setStyleSheet(
            'QPushButton {background-color: #002800; color : white};'
            )
        
    
    def get_file_name(self):
        self.path = QFileDialog.getOpenFileName(self, "Open Dialog", "", "*.mp3")[0]
    
    def catch_results_recognition(self,result, confidence):
        self.ui.get_file_button.setEnabled(True)
        self.ui.recognize_button.setEnabled(True)
        self.ui.translate_button.setEnabled(True)
        for line in result:
            self.ui.recognized_text.insertPlainText(line + chr(10))
        
    def catch_results_translation(self,result):
        self.ui.get_file_button.setEnabled(True)
        self.ui.recognize_button.setEnabled(True)
        self.ui.translate_button.setEnabled(True)
        for line in result:
            self.ui.translated_text.insertPlainText(line + chr(10))
    
    def call_recognition(self):
        self.ui.recognized_text.clear()
        self.ui.translated_text.clear()
        self.ui.get_file_button.setEnabled(False)
        self.ui.recognize_button.setEnabled(False)
        self.ui.translate_button.setEnabled(False)
        self.watson = CallWatson(self.path)
        if self.ui.is_watson.isChecked():
            self.start_recognition.connect(self.watson.run_recognition)
        elif self.ui.is_google.isChecked():
            self.start_recognition.connect(self.watson.run_google_recognition)
        else:
            return
        self.watson.throw_results_recognition.connect(
            self.catch_results_recognition
            )
        self.start_recognition.emit()
    
    def call_translation(self):
        self.ui.translated_text.clear()
        self.ui.get_file_button.setEnabled(False)
        self.ui.recognize_button.setEnabled(False)
        self.ui.translate_button.setEnabled(False)
        if self.ui.is_watson.isChecked():
            self.start_translation.connect(self.watson.run_translation)
        elif self.ui.is_google.isChecked():
            self.start_translation.connect(self.watson.run_google_translation)
        else:
            return
        self.watson.throw_results_translation.connect(
            self.catch_results_translation
            )
        self.start_translation.emit()
    
   

In [None]:
if __name__ == "__main__":
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=r"C:\Users\anna_\Downloads\SpeechToText-0269a9c91cff.json"
    app = QtWidgets.QApplication([])
    application = MainWindow()
    application.show()
    sys.exit(app.exec())