In [1]:
import pyaudio
import wave
import time
import struct
import speech_recognition as sr
print("speech_recognition version - {}".format(sr.__version__))

speech_recognition version - 3.6.3


### Functions for audio clipping, playing, and recording.

In [2]:
def clip16(x):    
    # Clipping for 16 bits
    if x > 32767:
        x = 32767
    elif x < -32768:
        x = -32768
    else:
        x = x        
    return int(x)

def play_wav(filename):

    import wave
    import pyaudio
    import time

    wf = wave.open(filename, 'rb')
    CHANNELS = wf.getnchannels()
    RATE = wf.getframerate() 
    WIDTH = wf.getsampwidth() 

    def my_callback(input_string, block_size, time_info, status):
        output_string = wf.readframes(block_size)
        return (output_string, pyaudio.paContinue)

    p = pyaudio.PyAudio()
    stream = p.open(format = p.get_format_from_width(WIDTH),
                    channels = CHANNELS,
                    rate = RATE,
                    input = False,
                    output = True,
                    stream_callback = my_callback)

    print('* Playing audio file: ' + filename)
    stream.start_stream()

    while stream.is_active():
        time.sleep(0.1)

    stream.stop_stream()
    print('Finished.')
    stream.close()

    p.terminate()
    
def record_wav(filename='test_01.wav', CHANNELS = 1, RATE = 16000, WIDTH = 2, GAIN = 1.0):

    def my_callback_fun(binary_input_data, block_size, time_info, status):
        input_tuple = struct.unpack('h', binary_input_data)
        output_sample = clip16(GAIN * input_tuple[0])
        output_list.append(output_sample)
        return(binary_input_data, pyaudio.paContinue)

    p = pyaudio.PyAudio()

    # Set PyAudio format
    PA_format = p.get_format_from_width(WIDTH)

    stream = p.open(format = PA_format,
                    channels = CHANNELS,
                    rate = RATE,
                    input = True,
                    output = False,
                    frames_per_buffer = 1,
                    stream_callback = my_callback_fun)

    output_list = []

    stream.start_stream()
    print('* Start recording')

    time.sleep(10.0)

    stream.stop_stream()
    print('* Finish recording')

    stream.close()
    p.terminate()

    # Convert output signal to binary signal to write to wave file 
    output_string = struct.pack('h'*len(output_list), *output_list)

    # write data into wav file
    wf = wave.open(filename, 'w')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(WIDTH)
    wf.setframerate(RATE)
    wf.writeframes(output_string)
    wf.close()

    print('* Audio saved to file: ' + filename)

### Record an audio File

In [3]:
filename = 'test_01.wav'
record_wav(filename)

* Start recording
* Finish recording
* Audio saved to file: test_01.wav


### Play the file

In [4]:
play_wav(filename)

* Playing audio file: test_01.wav
Finished.


### Run Speech Recognition

In [10]:
r = sr.Recognizer()
test_file = sr.AudioFile(filename)

with test_file as source:
    audio = r.record(source)
# type(audio)

start = time.time()
text = r.recognize_google(audio)
end = time.time()

print("Recognised speech : \n\n{}".format(text))
print("\nTime taken for speech recognition : {} seconds".format( round(end-start, 3) ))

f = open("test_01.txt", "w")
print(text, file=f)
f.close()

Recognised speech : 

this is a test audio 1 2 3 check

Time taken for speech recognition : 2.824 seconds


In [9]:
import ../Taco