In [None]:
# Install packages needed for imports below
# ! pip3 install wavio 
# ! pip3 install librosa
# ! pip3 install sounddevice
# had to install an earlier version of matplotlib in order to get the librosa waveshow function to work
# ! pip3 install matplotlib==3.7.3

In [11]:
# import necessary modules
import sounddevice as sd
from scipy.io.wavfile import write
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
import IPython.display as ipd
import numpy as np
import time 

In [13]:
# Define Sampling Rate or Frequency in Hz
sr = 44100

# Record duration in seconds
duration = 5

# Main function to record audio in mono and save to wav file
def record(path, sr):
    # Start audio recording
    recording = sd.rec(int(duration*sr), samplerate=sr, channels=1) #record with a mono channel microphone
    
    # Record audio for the given duration
    print("Recording... " + path)
    sd.wait()

    # Write it to a file
    write(path,sr,recording)

    # Create discrete array from recording
    x, sr = librosa.load(path)
    print("recording shape", x.shape)
    print("sampling rate", sr)
    return x, sr 

# generates time domain representation of audio array
def generate_time_plot(array, sr, plot_title):
    plt.figure(figsize=(14, 5))
    plt.title(plot_title + " Time Domain") # insert plot title based on function input
    plt.xlim(0, 5) # limit time frame from 0 to 5 seconds
    librosa.display.waveshow(array, sr=sr)
    plt.xlabel("Time (seconds)") 
    plt.ylabel("Amplitude")

# generates a spectrogram from the recorded audio array
def generate_spectrogram(array, plot_title):
    freq = librosa.amplitude_to_db(np.abs(librosa.stft(array)), ref=np.max)
    print(freq.shape)
    fig, ax = plt.subplots()
    plt.ylim(0,8000) # limit frequencies plotted to between 0 and 8000 Hz
    plt.title(plot_title + " Spectrogram") # insert plot title based on function input
    img = librosa.display.specshow(freq, x_axis='time', y_axis='linear',ax=ax)
    plt.xlabel("Time (seconds)")
    plt.ylabel("Frequency (Hertz)")
    fig.colorbar(img, ax=ax)

# Function takes a mono audio array and duplicates it produces a stereo array.
# The right channel array is then delayed and/or attenuated based on the input parameters
def delay_array(array, delay, delay_name, attenuation, attenuation_name, sr):
    # delete the last x number of values in the array based on delay
    delete_elements = array[0:array.size-delay] 
    
    # pad the beginning of the array with zeros
    pad_zeros = np.pad(delete_elements, (delay, 0), 'constant') 
   
    # combine the original array with the delayed array
    delayed_array = np.hstack((array.reshape(-1,1), pad_zeros.reshape(-1, 1)*attenuation)) 
    
    # if right array is only delayed, NOT attenuated, save with delay in file name
    if attenuation == 1: 
        filename = "team[]-stereosoundfile-[" + delay_name + "].wav" # 
    # if right array is delayed AND attenudated, save with delay and attenuation in file name
    else: 
        filename = "team[]-stereosoundfile-[" + delay_name + "]-[" + attenuation_name + "].wav"
    
    write(filename, sr, delayed_array) # write stereo array to a wave file

# Function runs the delay_array function for each desired delay and attenuation value requested in the project
def generate_files(array, sr):
    # calculated number of samples to delay by for team member 1
    frank_samples = (0.202/ 343) * sr 
    
    # calculate number of samples to delay by for team member 2
    perrin_samples = (0.194/ 343) * sr
    
    # average team member samples together to get the average number of samples to delay by
    delay_avg = (frank_samples+perrin_samples)/2
    
    # desired delay values
    delay_list = [0, round(delay_avg), round(0.001*sr), round(0.01*sr), round(0.1*sr)] 
    
    # delay values formatted for wav file name
    delay_names = ["0ms", "avghead", "1ms", "10ms", "100ms"] 
   
    # run delay array function for each of the 5 delay values
    for x in range(5): 
        delay_array(array, delay_list[x], delay_names[x], 1, "", sr)

    # desired attenuation values
    attenuation_list = [0.7, 0.5, 0.25] 
    
    # formatted for wave file name
    attenuation_names = ["-1.5dB", "-3dB", "-6dB"] 
    
    # run delay array function for each of the 3 attenuation values using 0ms and avghead delay
    for x in range(3): 
        delay_array(array, delay_list[0], delay_names[0], attenuation_list[x], attenuation_names[x], sr)
        delay_array(array, delay_list[1], delay_names[1], attenuation_list[x], attenuation_names[x], sr)


In [None]:
# Record the three phrases using record function
x1 , sr1 = record("quick_brown_fox.wav", sr)
print("Done!\n")
time.sleep(3) # pause 3 seconds between recordings
x2 , sr2 = record("antique_ivory_buckles.wav", sr)
print("Done!\n")
time.sleep(3) # pause 3 seconds between recordings
x3 , sr3 = record("exquisite_opal_jewels.wav", sr)
print("Done!\n")

In [None]:
# Play the audio
ipd.Audio(x1, rate=sr1) # load a NumPy array
ipd.Audio(x2, rate=sr2)
ipd.Audio(x3, rate=sr3)

In [None]:
# Generate a Time Domain Plot for Each Recording
generate_time_plot(x1, sr1, "Quick Brown Fox")
generate_time_plot(x2, sr2, "Antique Ivory Buckles")
generate_time_plot(x3, sr3, "Exquisite Opal Jewels")

In [None]:
# Generate a Spectrogram for Each Recording
generate_spectrogram(x1, "Quick Brown Fox")
generate_spectrogram(x2, "Antique Ivory Buckles")
generate_spectrogram(x3, "Exquisite Opal Jewels")

In [14]:
# Generate delayed and attenuated wav files for one of the phrases
# Uncomment line below if existing recording is used
#x1, sr1 = librosa.load("quick_brown_fox.wav")
generate_files(x1, sr1)