In [1]:
import time
import wave
import azure.cognitiveservices.speech as speechsdk
import json
from pyspark.sql.types import *
from pyspark.sql import *
print(speechsdk.__version__)

fields = [StructField("Id",StringType(), True),StructField("Offset", IntegerType(), True), StructField("Duration", IntegerType(), True), StructField("Text", StringType(), True)]
schema = StructType(fields)



In [2]:
def speech_recognition_with_pull_stream(myFile, myId):
    """gives an example how to use a pull audio stream to recognize speech from a custom audio
    source"""
    
    class WavFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
        """Example class that implements the Pull Audio Stream interface to recognize speech from
        an audio file"""
        def __init__(self, filename: str):
            super().__init__()
            self._file_h = wave.open(filename, mode=None)
            self.sample_width = self._file_h.getsampwidth()
            assert self._file_h.getnchannels() == 1
            assert self._file_h.getsampwidth() == 2
            assert self._file_h.getframerate() == 16000
            assert self._file_h.getcomptype() == 'NONE'

        def read(self, buffer: memoryview) -> int:
            """read callback function"""
            size = buffer.nbytes
            frames = self._file_h.readframes(size // self.sample_width)

            buffer[:len(frames)] = frames

            return len(frames)

        def close(self):
            """close callback function"""
            self._file_h.close()

    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

    # specify the audio format
    wave_format = speechsdk.audio.AudioStreamFormat(samples_per_second=16000, bits_per_sample=16,
            channels=1)

    # setup the audio stream
    callback = WavFileReaderCallback(myFile)
    stream = speechsdk.audio.PullAudioInputStream(callback, wave_format)
    audio_config = speechsdk.audio.AudioConfig(stream=stream)
    
    # instantiate the speech recognizer with pull stream input
    speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
    print(myId)
    done = False
    # create an empty dataframe for storing results
    
    #dfu = spark.createDataFrame(data=[], schema=schema)
    #dfu.show()
    dfu = spark.createDataFrame(data=[], schema=schema)
    # empty the temp file 
    dfu.write.mode('overwrite').parquet('temp_parq')
    
    def stop_cb(evt):
        """callback that stops continuous recognition upon receiving an event `evt`"""
        print('CLOSING on {}'.format(evt))
        speech_recognizer.stop_continuous_recognition()
        nonlocal done
        done = True

    def recognized_cb(evt):
        """ callback that """        
        newRow = Row(Id=myId, Offset=evt.result.offset, Duration=evt.result.duration, Text=evt.result.text)
        df = spark.createDataFrame(data=[newRow], schema=schema)
        # writing to a temp parquet file because having trouble persisting from the callback 
        df.write.mode('append').parquet('temp_parq')
    
    # Connect callbacks to the events fired by the speech recognizer
    speech_recognizer.recognized.connect(lambda evt: recognized_cb( evt))

    speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
    speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
    speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
    
    # stop continuous recognition on either session stopped or canceled events
    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    # Start continuous speech recognition
    speech_recognizer.start_continuous_recognition()

    while not done:
        time.sleep(.5)

    s = spark.read.parquet("temp_parq")
    return s
   
    