# Recording Audio from Computer


In [4]:
from sys import byteorder
from array import array
from struct import pack
import time

import pyaudio
import wave

# need to add ability to start and stop at will
# need to change the wait length and otherwise capture longer sounds

## audio capture code from : https://stackoverflow.com/questions/892199/detect-record-audio-in-python
THRESHOLD = 500
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt16
RATE = 44100

# from https://github.com/jonfroehlich/CSE599Sp2019/blob/master/Python/gesture_rec.py
# segmentation stuff


def is_silent(snd_data):
    "Returns 'True' if below the 'silent' threshold"
    return max(snd_data) < THRESHOLD

def normalize(snd_data):
    "Average the volume out"
    MAXIMUM = 16384
    times = float(MAXIMUM)/max(abs(i) for i in snd_data)

    r = array('h')
    for i in snd_data:
        r.append(int(i*times))
    return r

def trim(snd_data):
    "Trim the blank spots at the start and end"
    def _trim(snd_data):
        snd_started = False
        r = array('h')

        for i in snd_data:
            if not snd_started and abs(i)>THRESHOLD:
                snd_started = True
                r.append(i)

            elif snd_started:
                r.append(i)
        return r

    # Trim to the left
    snd_data = _trim(snd_data)

    # Trim to the right
    snd_data.reverse()
    snd_data = _trim(snd_data)
    snd_data.reverse()
    return snd_data

def add_silence(snd_data, seconds):
    "Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
    r = array('h', [0 for i in range(int(seconds*RATE))])
    r.extend(snd_data)
    r.extend([0 for i in range(int(seconds*RATE))])
    return r




def record():
    """
    Record a word or words from the microphone and 
    return the data as an array of signed shorts.

    Normalizes the audio, trims silence from the 
    start and end, and pads with 0.5 seconds of 
    blank sound to make sure VLC et al can play 
    it without getting chopped off.
    """
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)

    num_silent = 0
    snd_started = False

    r = array('h')

    while 1:
        # little endian, signed short
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)

        silent = is_silent(snd_data)

        if silent and snd_started:
            num_silent += 1
        elif not silent and snd_started:
            num_silent = 0
        elif not silent and not snd_started:
            snd_started = True

        if snd_started and num_silent > 30:
            break

    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()

    #r = normalize(r)
    r = trim(r)
    r = add_silence(r, 0.5)
    return sample_width, r

# different capture types

# start: manual
# stop: set time
def rec_man_time(duration = 4.0): # time in seconds
    start_time = time.time()
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)
    r = array('h')
    print("recording for ", duration)
    while (time.time() - start_time < duration):
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)
    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()
    
    return sample_width, r

# start: manual
# stop: threshold
def rec_man_thres():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)
    r = array('h')
    num_silent = 0
    print("recording")
    while 1:
        # little endian, signed short
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)

        silent = is_silent(snd_data)
        # is always started recording
        if silent:
            num_silent += 1
        elif not silent:
            num_silent = 0
        if num_silent > 30:
            print("stop recording: silence threshold")
            break
    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()
    
    return sample_width, r
    

# start: threshold
# stop: set time
def rec_thres_time(duration=4.0):
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)
    r = array('h')
    snd_started = False
    silent = True
    print("listening")
    while silent:
        # little endian, signed short
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        
        silent = is_silent(snd_data)
    #not silent
    print("recording")
    return rec_man_time(duration)
            
            

# start: threshold
# stop: threshold
def rec_thres_thres():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)

    num_silent = 0
    snd_started = False

    r = array('h')
    print("listening")
    while 1:
        # little endian, signed short
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)

        silent = is_silent(snd_data)
        if silent and snd_started:
            num_silent += 1
        elif not silent and snd_started:
            num_silent = 0
        elif not silent and not snd_started:
            print("recording")
            snd_started = True
        if snd_started and num_silent > 30:
            print("stop: silent too long")
            break

    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()

    return sample_width, r

## HARD b/c input is blocking
# start: manual
# stop: manual

# start: threshold
# stop: manual



def record_to_file(path, capture_method):
    "Records from the microphone and outputs the resulting data to 'path'"
    if capture_method == "tt":
        sample_width, data = rec_thres_thres()#record()
    elif capture_method == "mt":
        sample_width, data = rec_man_thres()
    elif capture_method == "mi":
        sample_width, data = rec_man_time()
    elif capture_method == "ti":
        sample_width, data = rec_thres_time()
    data = pack('<' + ('h'*len(data)), *data)

    wf = wave.open(path, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(sample_width)
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    

##### Annie Code for iterating over data collection
#map sound name to it's number of data so far and it's shortcut
class Sound:
    def __init__(self, name, shortcut, count=0):
        self.name = name
        self.shortcut = shortcut
        self.count = 0

sounds = {'me':Sound("MicrowaveEnding","me"),
          'mo':Sound("MicrowaveDoorOpen","mo"),
          'mc':Sound("MicrowaveDoorClose","mc"),
          's':Sound("Sizzle","s"),
          'b':Sound("Boiling","b"),
          't':Sound("Toaster","t"),
          'f':Sound("FireAlarm","f"),
          'o':Sound("OvenTimer","o"),
          'g':Sound("GarbageDisposal","g", 1),
          'i':Sound("InstantPotBeep","i"),
          'fr':Sound("FridgeDoor","fr"),
            'c':Sound("CoffeeGrinder","c")}   

# prompt for ground truth label    
def capture_audio(capture_method):
    while 1:
        prompt = "What sound?\n"
        for sound in sounds.values():
            prompt += sound.shortcut + ": " + sound.name +"; "
        prompt += "\n x: Exit\n"
        soundNameID = input(prompt)
        if soundNameID == "x":
            return
        outfile = sounds[soundNameID].name+"_"+str(sounds[soundNameID].count)+"_captured.wav"
        sounds[soundNameID].count += 1
        #print("recording\n")
        record_to_file(outfile, capture_method)
        print("saved to: "+outfile)
        
    
if __name__ == '__main__':
    #prompt = "What sound: "
    prompt = "what method: \ntt: threshold start/stop, \nmt: manual start/threshold stop, \nmi: manual start/timeout \nti threshold start/timeout stop\n"
    #for sound in Sounds:
    #    prompt += str(sound)
    #    print(sound)
    capture_method = input(prompt)
    capture_audio(capture_method)
    #rec_man_time()

what method: 
tt: threshold start/stop, 
mt: manual start/threshold stop, 
mi: manual start/timeout 
ti threshold start/timeout stop
mt
What sound?
me: MicrowaveEnding; mo: MicrowaveDoorOpen; mc: MicrowaveDoorClose; s: Sizzle; b: Boiling; t: Toaster; f: FireAlarm; o: OvenTimer; g: GarbageDisposal; i: InstantPotBeep; fr: FridgeDoor; c: CoffeeGrinder; 
 x: Exit
i
recording
silence threshold: stop recording
saved to: InstantPotBeep_0_captured.wav
What sound?
me: MicrowaveEnding; mo: MicrowaveDoorOpen; mc: MicrowaveDoorClose; s: Sizzle; b: Boiling; t: Toaster; f: FireAlarm; o: OvenTimer; g: GarbageDisposal; i: InstantPotBeep; fr: FridgeDoor; c: CoffeeGrinder; 
 x: Exit
x


# Sandbox

In [2]:
# https://stackoverflow.com/questions/892199/detect-record-audio-in-python
import pyaudio
import wave


# http://people.csail.mit.edu/hubert/pyaudio/
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

* recording
* done recording


   # play with messaging 



In [14]:
import serial 
#import serial

arduinoData = serial.Serial('/dev/cu.usbmodem1411',9600)
def led_on():
    arduinoData.write(b'1')
    
def led_off():
    arduinoData.write(b'0')
    
t =0
while(t<4000000):
    t+=1
led_off()
print("done")
    

done


# play with classification


In [None]:
#https://docs.python.org/3/library/wave.html
import wave

wave_1 = wave.open("./SoundSamples/CoffeeGrinder_0_captured.wav", 'rb')

wave_2 = wave.open("./SoundSamples/CoffeeGrinder_0_captured.wav", 'rb')

# segmentation


In [None]:
window_length = 30
window_step = 10
window_buffer = deque()
current_event = None #tuple (time list, val list)
time = deque()

def segment_event():
    segment_result = None
    if len(window_buffer) >= window_length:
        # you may need/want to change these tolerances
        min_max_begin_segment_threshold = 90 
        min_max_continue_segment_threshold = 25 #lower threshold for continuing event
        min_event_length_ms = 600

        # analyze the buffer
        s = np.array(window_buffer)
        min_max_diff = abs(np.max(s) - np.min(s))

        if min_max_diff > min_max_begin_segment_threshold and current_event is None:
            print("begin segment!", min_max_diff)

            start_idx = len(self.time) - self.window_length
            end_idx = len(self.time)

            ### !!!!
            t = list(itertools.islice(time, start_idx, end_idx))
            s = list(itertools.islice(self.mag, start_idx, end_idx))

            current_event = (t, s)
        elif current_event is not None:
            # we are in the middle or end of a potential event
            if min_max_diff >= min_max_continue_segment_threshold: 
                print("continue segment", min_max_diff)

                start_idx = len(time) - window_step
                end_idx = len(time)

                # !!!!
                t = list(itertools.islice(time, start_idx, end_idx))
                s = list(itertools.islice(self.mag, start_idx, end_idx))

                self.current_event[0].extend(t)
                self.current_event[1].extend(s)
            elif min_max_diff < min_max_continue_segment_threshold:
                print("finish segment", min_max_diff)
                event_time = current_event[0]
                event_length_ms = event_time[-1] - event_time[0]
                if event_length_ms > min_event_length_ms:
                    # TODO valid event
                    valid = True
                else:
                    print("discarded event for being too short")

                segment_result = {'time' : self.current_event[0],
                                  'signal' : self.current_event[1] }

                current_event = None # clear events

        new_length = window_length - window_step
        while len(window_buffer) > new_length:
            window_buffer.popleft()

    return segment_result

def rec():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)

    num_silent = 0
    snd_started = False
    
     while 1:
        # little endian, signed short
        snd_data = array('h', stream.read(CHUNK_SIZE))
        if byteorder == 'big':
            snd_data.byteswap()
        window_buffer.extend(snd_data)
        segment_result = segment_event()
        if segment_result != None:
            print("result: ",segment_result)
            #cls_result = self.classify_event(segment_result)