# Event-Detecting Audio Recorder

This notebook leverages the PyWaggle library and prior work from SageEdu to record and visualize audio detected automatically over a recording period. Only sounds that pass a loudness threshold are saved to file. After the notebook is finished recording, it plots waveform visualiziations of the saved sounds and of the full recording period. 

To use this notebook, run all of the cells. The final cell produces a user interface for configuring and starting the recording.

In [None]:
from waggle.data.audio import Microphone
from waggle.data.audio import AudioFolder
from collections import deque
import matplotlib.pyplot as plt
import ipywidgets as widgets
import numpy as np
import soundfile
import time
from datetime import datetime
import os

### Primary record function

The recording process occurs in chunks called frames, which are defined by the user as being *f* seconds long. The notebook records one frame at a time, testing each new frame against the energy threshold to determine whether or not it contains an event. When an event is found, that frame is set aside until enough frames have been recorded to fill a buffer window around the frame. The event frame is then saved as a .wav file with *b* frames of buffer on either side of it. Note that, due to this buffer system, during the first and last *b* frames of the recording process, no events will be detected or saved.

After recording *n* frames, the notebook draws visualizations of both the full *n(f)*-second recording and each individually saved event. 

In [None]:
# record_events() # # # # # # #
#
# sr: sample rate in hz
# threshold: energy cutoff point
# frame_len: length of a frame in seconds
# buffer_len: length of buffer in frames
# record_len: amount of frames to record
# path: path to write wav files to
# verbose: level of output to write
#
# # # # # # # # # # # # # # # #
def record_events(sr, threshold, frame_len, buffer_len, record_len, path, verbose):
    ### Pre-loop initialization
    # establish variables
    job_list = deque()
    buffer = []
    record_elapse = 0
    save_count = 0
    wave_len = (buffer_len * 2) + 1 # length in frames of full recording
    event_frames = [] # list of frames that were determined to be events
    wave_frames = 1 + (2 * buffer_len) # length of an event in frames
    wave_seconds = wave_frames * frame_len # length of an event in seconds
    
    # initialize microphone
    microphone = Microphone()
    
    # clear directory
    delete_events(path)
    
    ### Recording Loop
    print_info(1, verbose, "* Beginning recording cycle...")
    while record_elapse < record_len:
    
        # record frame and append to end of buffer
        frame = microphone.record(frame_len)
        buffer.append(frame)
        print_info(2, verbose, f"\n** Frame {record_elapse + 1}")
        
        # if there are jobs, handle them
        if len(job_list):
            for cnt, job in enumerate(job_list):
                job_list[cnt] = np.append(job, [record_elapse])
            
            # save and delete completed jobs
            if len(job_list[0]) == (wave_frames):
                wave = indeces_to_wave(buffer, job_list[0][0], job_list[0][-1])
                soundfile.write(path + f"{save_count}.wav", wave, sr)
                print_info(3, verbose, f"*** Saved event {save_count}")
                save_count += 1
                job_list.popleft()
                
        # check if frame passes threshold
        if np.max(frame.data) > threshold:
            # if event isn't early or late, then add as a job
            if record_elapse > buffer_len and record_elapse < (record_len - buffer_len):
                job_list.append(np.arange(record_elapse-buffer_len-1, record_elapse))
                event_frames.append(record_elapse-buffer_len-1)
                print_info(2, verbose, f"** Event detected on frame {record_elapse}")
            
        # increment loop counter
        record_elapse += 1
    
    ### Post-loop prints
    print_info(1, verbose, f"\n* Done recording. Events saved: {save_count}")
    print_info(1, verbose, f"Files saved to {path}:")
    print_events(path)
    
    ### Analysis
    # initializing stuff
    full_wave = frames_to_wave(buffer)
    time = np.arange(0, len(full_wave) / sr, 1/sr)
    frame_in_samples = sr * frame_len
    xticks = np.arange(0, frame_len*record_len, frame_len)
    
    # full buffer analysis
    plt.figure(figsize=(17, 5))
    plt.axes(xlim=(0, frame_len*record_len), xticks=(xticks), ylim=(-1, 1))
    plt.title(f"Full Recording")
    plt.xlabel("Time [s]")
    plt.plot(time, full_wave)
    plt.axhline(0, color='k')
    plt.axhline(threshold, color='r')
    plt.grid(axis='x')
    plt.tight_layout()
    plt.show()
    
    # event analysis
    dataset = AudioFolder(path)
        
    for cnt, sample in enumerate(dataset):
        time = np.arange(0, len(sample.data) / sample.samplerate, 1/sample.samplerate)
        time += (event_frames[cnt]*frame_len)
    
        plt.figure(cnt, figsize=(17, 5))
        plt.axes(xticks=(xticks), ylim=(-1, 1))
        plt.title(f"Event {cnt}")
        plt.xlabel("Time [s]")
        plt.plot(time, sample.data) # plot event
        plt.axhline(0, color='k')
        plt.axhline(threshold, color='r') # plot threshold line
        plt.grid(axis='x')
        plt.tight_layout()
        plt.show()

### Helper functions

Various subroutines to assist in processing and writing audio files, etc.

In [None]:
# Given a list of frames, return a single concatenated np array of the wave.
# buffer - a list of np array waves
def frames_to_wave(buffer):
    wave = np.array([])
    for i in buffer:
        wave = np.append(wave, i.data)
        
    return wave

In [None]:
# Given start and stop indeces, call frames_to_wave on a subset of a given list of frames.
# buffer - a list of np array waves
# start - start index
# stop - stop index
def indeces_to_wave(buffer, start, stop):
    return frames_to_wave(buffer[start:stop])

In [None]:
# Print a given message if the mode is less than or equal to the verbosity.
# mode - the verbosity level of the given message
# verbose - the user verbosity level
# message - the message to print
def print_info(mode, verbose, message):
    if mode <= int(verbose):
        print(message)

In [None]:
# Delete all .wav files in the given directory.
# path - the directory to search
def delete_events(path):
    for x in os.listdir(path):
        if x[-4:] == ".wav":
            os.unlink(f"{path}/{x}")

In [None]:
# Print the name of all the .wav files in the given directory.
# path - the directory to search
def print_events(path):
    for x in os.listdir(path):
        if x[-4:] == ".wav":
            print(f"{x}")

### Record and plot events

Running the cell below will generate an interface you can use to adjust the recording settings. Here are what each of the controls mean:

- **Sample rate** - Sets the sample rate of the recording in Hz. Default is 48000Hz.
- **Threshold** - Sets the energy magnitude at which a sound is recognized as an event. Default is 0.20, and can be adjusted between 0.00 and 1.00.
- **Frame length** - Defines the length of a single frame in *seconds*. Default is 5 seconds.
- **Buffer size** - Defines the length of the buffer in *frames.* For example if the buffer is set to 1, then when a frame is found to contain an event, the exported file will include that frame as well as one frame immediately before and immediately after it. Default is 1 frame.
- **Record size** - Defines how many *frames* should be recorded in total. Default is 5 frames.
- **Folder path** - Determines the directory to save .wav files to. Defaults to ``audio_files/``.
- **Verbosity** - Determines the amount of system text to output during the recording process. Verbosity 0 will generate no text output; verbosity 1 will generate start and stop messages; and verbosity 2 and 3 will generate more detailed output about how the recording process is running.

Once you have adjusted the settings to your liking, click the **Run Interact** button to begin recording. Once the recording is finished, it will list the names of the audio files saved during the process, and then it will draw waveform plots of the full recording and each of the saved events.

In [None]:
interact_ui = widgets.interact_manual(record_events,
            sr = widgets.BoundedIntText(value=48000, min=0, max=100000, step=100, description='Sample rate', indent=10),
            threshold = widgets.FloatSlider(value=0.2, min=0, max=1.0, step=0.01, description='Threshold',
                        continuous_update=False, readout_format='.2f'),
            frame_len = widgets.BoundedIntText(value=5, min=1, max=7200, step=1, description='Frame length'),
            buffer_len = widgets.BoundedIntText(value=1, min=0, max=100, step=1, description='Buffer size'),
            record_len = widgets.BoundedIntText(value=5, min=0, step=1, description='Record size'),
            path = widgets.Text(value='audio_files/', description='Folder path:'),
            verbose = widgets.Dropdown(options=['0', '1', '2', '3'], value='1', description='Verbosity:'))