In [None]:
"""Pre-processes chunks to be compliant with speech studio length."""
from pydub import AudioSegment
from pydub.silence import split_on_silence
voice = AudioSegment.from_file('alfred.wav', format='wav')

chunks = split_on_silence(voice, min_silence_len=1500, silence_thresh=-20, keep_silence=500)

for i, chunk in enumerate(chunks):
    chunk_name = 'alfred_utterances/chunk{0}.wav'.format(i)
    print("exporting", chunk_name)
    chunk.export(chunk_name, format='wav')

In [None]:
"""Leverage speech recognizer as a lightweight transcription API."""
import azure.cognitiveservices.speech as speechsdk
from shutil import copyfile

speech_key = %env KEY
service_region = %env REGION
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

chunks = 911

for i in range(0, chunks):
    # Creates an audio configuration that points to an audio file.
    filename = 'utterances/chunk{}.wav'.format(i)
    audio_input = speechsdk.audio.AudioConfig(filename=filename)

    # Creates a recognizer with the given settings
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input)

    # Starts speech recognition, and returns after a single utterance is recognized.
    result = speech_recognizer.recognize_once()

    # Checks result and saves files and transcript formatted for speech studio.
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        formatted_transcript_line = '{}\t {}'.format(i, result.text)
        print(formatted_transcript_line)
        copyfile(filename, 'batman_utterances_all/{}.wav'.format(i))
        with open("transcript.txt", "a") as a_file:
            a_file.write(formatted_transcript_line)
            a_file.write("\n")
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print('No speech could be recognized: {}'.format(result.no_match_details))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print('Speech Recognition canceled: {}'.format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print('Error details: {}'.format(cancellation_details.error_details))