# Example Functionality

Here is a collection of functionality to use as a reference for Part 1 of the workshop.

These are not necessarily the best or only way to achieve the desired output. These should serve simply as a reference.

## PROGRAMMING CHALLENGE: Get length of audio in seconds

In [1]:
def get_length_sec(samples, sampling_rate):
    time_sec = len(samples) / sampling_rate
    return time_sec

## PROGRAMMING CHALLENGE: Resample Audio

In [2]:
def resample_audio(samples, sr_old, sr_new):
    data = samples.copy()
    time_sec = len(data)/sr_old 
    num_samples = int(time_sec * sr_new)
    data = resample(data, num_samples)
    return data, sr_new

## PROGRAMMING CHALLENGE: Experiment with the window size

Speech: windows range usually from 16 ms to 25 ms.

In [None]:
fft_window_length = 25  # <------ EXPERIMENT WITH THIS VALUE. 

stft_speech = sp.feats.get_stft(
    speech, 
    sr=speech_sampling_rate, 
    win_size_ms=fft_window_length,
    fft_bins=sr_bird2,
)
sp.feats.plot(
    stft_speech, 
    feature_type="stft", 
    title=f"STFT window length: {fft_window_length} ms")

Background Noise: windows are longer (length varies; check out some [research papers via Google scholar!](https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=noise+classification+machine+learning&btnG=&oq=noise+classification+m))

In [None]:
fft_window_length = 100  # <------ EXPERIMENT WITH THIS VALUE. 

stft_speech = sp.feats.get_stft(
    speech, 
    sr=speech_sampling_rate, 
    win_size_ms=fft_window_length,
    fft_bins=sr_bird2,
)
sp.feats.plot(
    stft_speech, 
    feature_type="stft", 
    title=f"STFT window length: {fft_window_length} ms")

## PROGRAMMING CHALLENGE: Manipulate the sound

In [5]:
def adjust_volume(samples, amount):
    samps = samples.copy()
    samps = samps * amount
    return samps

In [3]:
def mirror_sound(samples):
    mirrored_samples = np.concatenate([samples,np.flip(samples[:-1])])
    return mirrored_samples

In [None]:
def addsounds(samples1, samples2):
    if len(samples1) > len(samples2):
        return samples1[:len(samples2)] + samples2
    return samples1 + samples2[:len(samples1)]

In [4]:
def repeat_audio(samples, repeat_n_times = 2):
    # ensure not to manipulate original data
    samps_copy = samples.copy()
    orig_shape = samps_copy.shape
    # ensure data is mono channel
    index_data = np.argmax(orig_shape)
    index_channel = np.argmin(orig_shape)
    if index_channel != 1 and index_channel != index_data:
        raise ValueError("Audio should be shape: (num_samples, num_channels).")
    if orig_shape[index_channel] > 1 and index_channel != index_data:
        raise ValueError("Audio should be mono channel.")
    new_shape = (orig_shape[0] * repeat_n_times,)
    samples_repeated = np.empty(new_shape)
    index = 0
    for i in range(repeat_n_times):
        samples_repeated[index:index+len(samps_copy)] = samps_copy
        index += len(samps_copy)
    return samples_repeated

## PROGRAMMING CHALLENGE: Normalize Audio between -1 and 1

In [None]:
def norm_samples(samples):
    """Scales the sound to be between -1 and 1.
    """
    ref_max = samples.max()
    x = samples.copy()
    normed_samples = x / ref_max
    return normed_samples

## PROGRAMMING CHALLENGE: Normalize Audio between -1 and 1, uphold original volume


In [1]:
def norm_samples(samples):
    """Scales the sound to be between -1 and 1.
    """
    if samples.dtype == "int16":
        ref_max = 32767 # min: -32768  max: 32767
    elif samples.dtype == "int32":
        ref_max = 2147483647 #  min: -2147483648 max: 2147483647
    else: 
        ref_max = 1 # min: -1  max: 1
    x = samples.copy()
    return(x / ref_max)

## PROGRAMMING CHALLENGE: Make stereo audio mono

In [None]:
def stereo2mono(samples):
    """Sums the channels together to form mono channel data.
    
    Resources:
       https://github.com/microsoft/MS-SNSD/blob/master/audiolib.py 
    """
    data = samples.copy()
    if np.argmax(data.shape) != 0:
        import warnings
        msg = "Sample data expects (num_samples, num_channels). "+\
            f"Data supplied has the shape {data.shape}, which looks like "+\
                "(num_channels, num_samples). Perhaps check your audio."
        warnings.warn(msg)
    data = data.T
    data = data.sum(axis=0)/data.shape[0]
    return data

## PROGRAMMING CHALLENGE: Visualize features according to research

In [None]:
mfcc_paper = sp.feats.get_mfcc(
    samples_speech, 
    sr=sr_speech, 
    num_mfcc=40, 
    win_size_ms=25, 
    percent_overlap=0.4
)