# Detect and Record Audio
You can detect and record audio with this python code. This code is runnable as long as python3 installed, including on Jetson Nano 2GB.

To run this code you will need to fix these global variables:
- DIRECTORY
- INPUT_DEVICE_INDEX

### Import essential libraries

In [1]:
from sys import byteorder
from array import array
from struct import pack

import os
import shutil
import pyaudio
import wave
import IPython

### Declare global variables

In [2]:
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
INPUT_DEVICE_INDEX = 11
CHUNK = 4096
WAVE_OUTPUT_FILENAME = "audio_"
THRESHOLD = 3000
DIRECTORY = "/workspace/jetbot/notebooks/speech_recording/data"

### is_silent(send_data)
Returns 'True' if below the 'silent' threshold

In [3]:
def is_silent(send_data):
    return max(send_data) < THRESHOLD

### normalize(send_data)
Averge the volume out

In [4]:
def normalize(send_data):
    MAXIMUM = 16384
    times = float(MAXIMUM) / max(abs(i) for i in send_data)
    
    r = array('h')
    for i in send_data:
        r.append(int(i * times))
    return r

### trim(send_data)
Trim the blank spots at the start and end

In [5]:
def trim(send_data):
    
    def _trim(send_data):
        send_started = False
        r = array('h')
        for i in send_data:
            if not send_started and abs(i) > THRESHOLD:
                send_started = True
                r.append(i)
            elif send_started:
                r.append(i)
        return r
    
    # trim to the left
    send_data = _trim(send_data)
    
    # trim to the right
    send_data.reverse()
    send_data = _trim(send_data)
    send_data.reverse()
    return send_data

### add_silence(send_data, seconds)
Add silence to the start and end of 'send_data' of length 'seconds' (float)

In [6]:
def add_silence(send_data, seconds):
    silence = [0] * int(seconds * RATE)
    r = array('h', silence)
    r.extend(send_data)
    r.extend(silence)
    return r

### record()
Record a word or words from the microphone and return the data as an array of signed shorts.
Normalizes the audio, trims silence from the start and end, and pads with 0.5 seconds of blank sound to make sure VLC et al can play it without getting chopped off.

In [7]:
def record():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, 
                    input=True, output=True, 
                    input_device_index=INPUT_DEVICE_INDEX, 
                    frames_per_buffer=CHUNK)
    
    num_silent = 0
    send_started = False
    
    r = array('h')
    
    while True:
        # little endian, signed short
        send_data = array('h', stream.read(CHUNK))
        if byteorder == 'big':
            send_data.byteswap()
        r.extend(send_data)
        
        silent = is_silent(send_data)
        
        if silent and send_started:
            num_silent += 1
        elif not silent and not send_started:
            send_started = True
        
        if send_started and num_silent > 30:
            break
            
    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()
    
    r = normalize(r)
    r = trim(r)
    r = add_silence(r, 0.5)
    return sample_width, r

### record_to_file(path)
Records from the microphone and output the resulting data to 'path'

In [8]:
def record_to_file(path):
    sample_width, data = record()
    data = pack('<' + ('h' * len(data)), *data)
    
    wf = wave.open(path, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(sample_width)
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()

### Initialize detect and record audio

In [9]:
# if 'directory' exist, remove and remake the 'directory'
if os.path.isdir(DIRECTORY):
    shutil.rmtree(DIRECTORY + '/')
os.mkdir(DIRECTORY)

cnt = 1

try:
    while True:
        print("Please speak a word into the microphone")
        record_to_file(DIRECTORY + '/' + WAVE_OUTPUT_FILENAME + str(cnt) + ".wav")
        print("Done - reuslt written to " + WAVE_OUTPUT_FILENAME + str(cnt) + ".wav")
        IPython.display.display(IPython.display.Audio(DIRECTORY + '/' + WAVE_OUTPUT_FILENAME + str(cnt) + ".wav"))
        cnt += 1
except KeyboardInterrupt:
    print("Done - keyboard interrupted")

Please speak a word into the microphone
Done - reuslt written to audio_1.wav


Please speak a word into the microphone
Done - keyboard interrupted
