In [15]:
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import IPython.display as ipd
import soundfile as sf
from scipy.signal import medfilt
import math

In [8]:
#CLEANING/PREPROCESSING STEP 1: take audio from x minutes to 10 seconds

# This step was done outside of the notebook to free space in
# the GitHub repository. To see the operation, view the utility.py
# file.

In [23]:
#CLEANING/PREPROCESSING STEP 2: remove ambient noise (i.e. crowd cheer, clapping, wind)
# There is a caveat to this step in that the magnitude of the ambience cannot be equal
# to or greater than the speaker. For this reason, only some files can be cleaned
# in this manner.

def removeAmbience(inputWav):
    audioArray, sampleRate = librosa.load(inputWav)
    # visualize wave
    plt.figure(figsize=(8, 4))
    librosa.display.waveshow(audioArray, alpha=0.5)
    plt.title("Audio Signal")
    plt.ylabel("Amplitude")
    plt.ylim((-1, 1))

    # filter out the most minimal signals
    magSpec, phase = librosa.magphase(librosa.stft(audioArray)) # D = S*P
    specFilter = librosa.decompose.nn_filter(magSpec, aggregate=np.median,
                    metric='cosine', width=int(librosa.time_to_frames(2, sr=sampleRate)))
    specFilter = np.minimum(magSpec, specFilter)

    # apply masks over spectrogram magnitude
    ambientMargin = 2
    ambientMask = librosa.util.softmask(specFilter,
                        (ambientMargin*(magSpec-specFilter)), power=2)
    foregroundMargin = 10
    foregroundMask = librosa.util.softmask((magSpec-specFilter),
                        (foregroundMargin*specFilter), power=2)
    ambientSpec = ambientMask*magSpec
    foregroundSpec = foregroundMask*magSpec

    plt.figure(figsize=(12, 8))
    plt.subplot(3, 1, 1)
    librosa.display.specshow(librosa.amplitude_to_db(magSpec, ref=np.max),
                            y_axis='log', sr=sampleRate)
    plt.title('Full spectrum')
    clb = plt.colorbar(format="%+2.f")
    clb.ax.set_xlabel("dBFS")

    plt.subplot(3, 1, 2)
    librosa.display.specshow(librosa.amplitude_to_db(ambientSpec, ref=np.max),
                            y_axis='log', sr=sampleRate)
    plt.title('Background')
    clb = plt.colorbar(format="%+2.f")
    clb.ax.set_xlabel("dBFS")
    plt.subplot(3, 1, 3)
    librosa.display.specshow(librosa.amplitude_to_db(foregroundSpec, ref=np.max),
                            y_axis='log', x_axis='time', sr=sampleRate)
    plt.title('Foreground')
    clb = plt.colorbar(format="%+2.f")
    clb.ax.set_xlabel("dBFS")
    plt.tight_layout()
    plt.show()

    # reconstruct foreground signal
    complexSpec = foregroundSpec*phase
    reconstructSignal = librosa.istft(complexSpec)
    return reconstructSignal

In [None]:
#This will be final function
def wavToDataFrame(inputWav, outputDF):
    audioArray, sampleRate = librosa.load(inputWav)