In [None]:
import speech_recognition as sr


In [None]:
class ModelVoice2Text(object):
    def __init__(self, input_type='mic',input_file=None, recording_limit=10):
        self.recognizer = sr.Recognizer()
        if input_type == 'mic':
            self.microphone = sr.Microphone()
            self.audiofile = None
        else:
            self.filepath = input_file
            self.audiofile = sr.AudioFile(self.filepath)
        self.response = None
        self.recording_timeout = recording_limit
        #self.soundfile = sf
        #self.sounddevice = sd
        
        
    def reconocer_desde_mic(self):
        """Transcribe speech from recorded from `microphone`.

        Returns a dictionary with three keys:
        "success": a boolean indicating whether or not the API request was
                   successful
        "error":   `None` if no error occured, otherwise a string containing
                   an error message if the API could not be reached or
                   speech was unrecognizable
        "transcription": `None` if speech could not be transcribed,
                   otherwise a string containing the transcribed text
        """
        # check that recognizer and microphone arguments are appropriate type
        if not isinstance(self.recognizer, sr.Recognizer):
            raise TypeError("`recognizer` must be `Recognizer` instance")

        if not isinstance(self.microphone, sr.Microphone):
            raise TypeError("`microphone` must be `Microphone` instance")

        # adjust the recognizer sensitivity to ambient noise and record audio
        # from the microphone
        with self.microphone as source:
            print('Comience a hablar')
            self.recognizer.adjust_for_ambient_noise(source)
            audio = self.recognizer.listen(source)

        # set up the response object
        self.response = {
            "success": True,
            "error": None,
            "transcription": None
        }

        # try recognizing the speech in the recording
        # if a RequestError or UnknownValueError exception is caught,
        #     update the response object accordingly
        try:
            self.response["transcription"] = self.recognizer.recognize_google(audio, language='es-AR')
            #response["transcription"] = self.recognizer.recognize_wit(audio#)
        except sr.RequestError:
            # API was unreachable or unresponsive
            self.response["success"] = False
            self.response["error"] = "API unavailable"
        except sr.UnknownValueError:
            # speech was unintelligible
            self.response["error"] = "Unable to recognize speech"

        return self.response


    def reconocer_desde_wav(self):
        """Transcribe speech from recorded from `microphone`.

        Returns a dictionary with three keys:
        "success": a boolean indicating whether or not the API request was
                   successful
        "error":   `None` if no error occured, otherwise a string containing
                   an error message if the API could not be reached or
                   speech was unrecognizable
        "transcription": `None` if speech could not be transcribed,
                   otherwise a string containing the transcribed text
        """
        # check that recognizer and microphone arguments are appropriate type
        if not isinstance(self.recognizer, sr.Recognizer):
            raise TypeError("`recognizer` must be `Recognizer` instance")

        if not isinstance(self.audiofile, sr.AudioFile):
            raise TypeError("`microphone` must be `Microphone` instance")

        # adjust the recognizer sensitivity to ambient noise and record audio
        # from the microphone
        print('Escuchando audio {}...'.format(self.filepath))
        self.escuchar_audio(self.filepath)
        with self.audiofile as source:
            self.recognizer.adjust_for_ambient_noise(source)
            audio = self.recognizer.record(source)

        # set up the response object
        self.response = {
            "success": True,
            "error": None,
            "transcription": None
        }

        # try recognizing the speech in the recording
        # if a RequestError or UnknownValueError exception is caught,
        #     update the response object accordingly
        try:
            self.response["transcription"] = self.recognizer.recognize_google(audio, language='es-AR')
            #self.response["transcription"] = self.recognizer.recognize_wit(audio, key='5YWGEQ6XHUUYCG6ZOVHOSIVJYT2MYGJW')
        except sr.RequestError:
            # API was unreachable or unresponsive
            self.response["success"] = False
            self.response["error"] = "API unavailable"
        except sr.UnknownValueError:
            # speech was unintelligible
            self.response["error"] = "Unable to recognize speech"

        return self.response
    
    def grabar_nuevo_audio(self, filename='audiotest.wav'):
        
        with  self.microphone as source:
            print("Hable!")
            self.recognizer.adjust_for_ambient_noise(source)
            audio =  self.recognizer.listen(source, phrase_time_limit=self.recording_timeout)
            
        # write audio to a WAV file
        with open(filename, "wb") as f:
            f.write(audio.get_wav_data())
        f.close()
        
    def escuchar_audio(self, filename='audiotest.wav'):
        try:
            import sounddevice as sd
            import soundfile as sf
            data, fs = sf.read(filename, dtype='float32')
            sd.play(data, fs)
            status = sd.wait()
            if status:
                parser.exit('Error during playback: ' + str(status))
        except KeyboardInterrupt:
            parser.exit('\nInterrupted by user')
        except Exception as e:
            parser.exit(type(e).__name__ + ': ' + str(e))

## Prueba de documento de voz desde archivo 1:

El texto leído es: Esto es una prueba básica para entender la funcionalidad de voz a texto

In [78]:
modelo_v2t = ModelVoice2Text(input_type='file', input_file='audiotest1.wav')
response = modelo_v2t.reconocer_desde_wav()
print('El texto detectado del microfono es: {response}'.format(response=response["transcription"]))

Escuchando audio audiotest1.wav...
El texto detectado del microfono es: prueba básica para entender la funcionalidad de voz a texto


## Prueba de documento de voz desde archivo 2:

El texto leído es: Lanús está al sur de la ciudad de Buenos Aires

In [82]:
modelo_v2t = ModelVoice2Text(input_type='file', input_file='audiotest2.wav')
response = modelo_v2t.reconocer_desde_wav()
print('El texto detectado del microfono es: {response}'.format(response=response["transcription"]))

Escuchando audio audiotest2.wav...
El texto detectado del microfono es: está al sur de la Ciudad de Buenos Aires


## Prueba de documento de voz desde archivo 3:

El texto leído es: La policía tiene un nuevo caso que atender en la comisaría 39

In [80]:
modelo_v2t = ModelVoice2Text(input_type='file', input_file='audiotest3.wav')
response = modelo_v2t.reconocer_desde_wav()
print('El texto detectado del microfono es: {response}'.format(response=response["transcription"]))

Escuchando audio audiotest3.wav...
El texto detectado del microfono es: La policía tiene un nuevo caso que atender en la comisaría 39


## Prueba de documento de voz desde archivo 4:

El texto leído es: Emergencia por choque entre el cruce de calles 25 de mayo y 2 de mayo

In [81]:
modelo_v2t = ModelVoice2Text(input_type='file', input_file='audiotest4.wav')
response = modelo_v2t.reconocer_desde_wav()
print('El texto detectado del microfono es: {response}'.format(response=response["transcription"]))

Escuchando audio audiotest4.wav...
El texto detectado del microfono es: emergencia por choque entre el cruce de calles 25 de mayo y 2 de mayo


## Grabar audio a archivo usando Speech Recognition

In [None]:
modelo_v2t = ModelVoice2Text(input_type='mic')
modelo_v2t.grabar_nuevo_audio(filename='audiotest33.wav')

## Escuchar audio desde archivo .wav

In [None]:
#modelo_v2t = ModelVoice2Text(input_type='file', input_file='audiotest1.wav')
#modelo_v2t.escuchar_audio(filename='audiotest1.wav')

## Reconocimiento de voz desde el micrófono a texto:

Puede hablar hasta que se detecte silencio

In [None]:
#modelo_v2t = ModelVoice2Text(input_type='mic')
#response = modelo_v2t.reconocer_desde_mic()
#print('El texto detectado del microfono es: {response}'.format(response=response["transcription"]))
#print(response)