In [1]:
import librosa
import librosa.display
import pandas as pd
import numpy as np
import math

In [2]:
FRAME_LENGTH = 1024
HOP_LENGTH = 512

In [3]:
#CLEANING/PREPROCESSING STEP 1: Take audio from x minutes to 10 seconds

# This step was done outside of the notebook to free space in
# the GitHub repository. To see the operation, view the utility.py
# file.

In [4]:
#CLEANING/PREPROCESSING STEP 2: Remove ambient noise (i.e. crowd cheer, clapping, wind)
# There is a caveat to this step in that the magnitude of the ambience cannot be equal
# to or greater than the speaker. For this reason, only some files can be cleaned
# in this manner.

def removeAmbience(inputWav):
    audioArray, sampleRate = librosa.load(inputWav)
    
    # filter out the most minimal signals
    magSpec, phase = librosa.magphase(librosa.stft(audioArray)) # D = S*P
    specFilter = librosa.decompose.nn_filter(magSpec, aggregate=np.median,
                    metric='cosine', width=int(librosa.time_to_frames(2, sr=sampleRate)))
    specFilter = np.minimum(magSpec, specFilter)

    # apply masks over spectrogram magnitude
    ambientMargin = 2
    ambientMask = librosa.util.softmask(specFilter,
                        (ambientMargin*(magSpec-specFilter)), power=2)
    foregroundMargin = 10
    foregroundMask = librosa.util.softmask((magSpec-specFilter),
                        (foregroundMargin*specFilter), power=2)
    ambientSpec = ambientMask*magSpec
    foregroundSpec = foregroundMask*magSpec

    # reconstruct foreground signal
    complexSpec = foregroundSpec*phase

    #CLEANING/PREPROCESSING STEP 11: pad lost samples from reconstruction
    reconstructSignal = librosa.istft(complexSpec)
    padding = len(audioArray)-len(reconstructSignal)
    reconstructPadded = np.pad(reconstructSignal, (0, padding), 'constant', constant_values=(0, 0))
    
    return reconstructPadded, sampleRate

In [None]:
#CLEANING/PREPROCESSING STEP 3: Retrieve amplitude envelope

def amplitudeEnvelope(signal, frameLength, hopLength):
    envelope = []
    for i in range(0, len(signal), hopLength):
        frameAE = max(signal[i:i+frameLength])
        envelope.append(frameAE)
    return np.array(envelope)

In [6]:
#CLEANING/PREPROCESSING STEP 4: Retreive Root Mean Square Energy

def rms(signal, frameLength, hopLength):
    return librosa.feature.rms(y=signal, frame_length=frameLength, hop_length=hopLength)[0]

In [7]:
#CLEANING/PREPROCESSING STEP 5: Retreive Zero Crossing Rate

def zcr(signal, frameLength, hopLength):
    return librosa.feature.zero_crossing_rate(signal, frame_length=frameLength, hop_length=hopLength)[0]

In [8]:
#CLEANING/PREPROCESSING STEP 6: Retreive Mel Frequency Cepstrum Coefficients

def mfcc20(signal, sampleRate):
    return librosa.feature.mfcc(y=signal, n_mfcc=20, sr=sampleRate)

In [9]:
#CLEANING/PREPROCESSING STEP 7: Retreive Spectral Centroid

def spectralCentroid(signal, sampleRate, frameLength, hopLength):
    return librosa.feature.spectral_centroid(y=signal, sr=sampleRate, n_fft=frameLength, hop_length=hopLength)[0]

In [10]:
#CLEANING/PREPROCESSING STEP 8: Retreive Spectral Bandwidth

def spectralBandwidth(signal, sampleRate, frameLength, hopLength):
    return librosa.feature.spectral_bandwidth(y=signal, sr=sampleRate, n_fft=frameLength, hop_length=hopLength)[0]

In [11]:
#CLEANING/PREPROCESSING STEP 9: Spectral Rolloff
def spectralRolloff(signal, sampleRate, frameLength, hopLength):
    return librosa.feature.spectral_rolloff(y=signal, sr=sampleRate, n_fft=frameLength, hop_length=hopLength)[0]

In [12]:
def normalize(array):
    return (array - np.min(array))/(np.max(array)-np.min(array))

In [None]:
# This will be final function
def wavToDataFrame(inputWav, filename, intelligence, filterAmbience=False):
    if filterAmbience:
        audioArray, sampleRate = removeAmbience(inputWav)
    else:
        audioArray, sampleRate = librosa.load(inputWav)

    #CLEANING/PREPROCESSING STEP 9: Include Binary Classifier

    frames = range(0, math.ceil(len(audioArray)/512))
    fn = [filename for i in range(len(frames))]
    i = [intelligence for i in range(len(frames))]
    data = {"frameID": frames, "filename": fn, "intelligence": i}
    df = pd.DataFrame(data=data)

    AE = amplitudeEnvelope(audioArray, FRAME_LENGTH, HOP_LENGTH)
    RMSE = rms(audioArray, FRAME_LENGTH, HOP_LENGTH)
    ZCR = zcr(audioArray, FRAME_LENGTH, HOP_LENGTH)    
    SC = spectralCentroid(audioArray, sampleRate, FRAME_LENGTH, HOP_LENGTH)
    SB = spectralBandwidth(audioArray, sampleRate, FRAME_LENGTH, HOP_LENGTH)
    SR = spectralRolloff(audioArray, sampleRate, FRAME_LENGTH, HOP_LENGTH)

    #CLEANING/PREPROCESSING STEP 10: Normalize Arrays
    
    df["amplitudeEnvelope"] = normalize(AE)
    df["RMSE"] = normalize(RMSE)
    df["ZCR"] = normalize(ZCR)

    mfcc = mfcc20(audioArray, sampleRate)
    for i in range(len(mfcc)):
        feature = "MFCC"+str(i+1)
        df[feature] = normalize(mfcc[i])
    
    df["spectralCentroid"] = normalize(SC)
    df["spectralBandwidth"] = normalize(SB)
    df["spectralRolloff"] = normalize(SR)

    return df

In [14]:
bidenDF = wavToDataFrame("AudioData/biden-human.wav", "biden-human.wav", "Human", filterAmbience=False)
bidenAiDF = wavToDataFrame("AudioData/biden-to-obama-ai.wav", "biden-to-obama-ai.wav", "AI", filterAmbience=False)
linusDF = wavToDataFrame("AudioData/linus-human.wav", "linus-human.wav", "Human", filterAmbience=False)
linusAiDF = wavToDataFrame("AudioData/linus-to-ryan-ai.wav", "linus-to-ryan-ai.wav", "AI", filterAmbience=False)
margotDF = wavToDataFrame("AudioData/margot-human.wav", "margot-human.wav", "Human", filterAmbience=True)
margotAiDF = wavToDataFrame("AudioData/margot-to-trump-ai.wav", "margot-to-trump-ai.wav", "AI", filterAmbience=True)
taylorDF = wavToDataFrame("AudioData/taylor-human.wav", "taylor-human.wav", "Human", filterAmbience=True)
taylorAiDF = wavToDataFrame("AudioData/taylor-to-margot-ai.wav", "taylor-to-margot-ai.wav", "AI", filterAmbience=True)

# Concat dataframes
df = pd.concat([bidenDF, bidenAiDF, linusDF, linusAiDF, margotDF, margotAiDF, taylorDF, taylorAiDF])

# create CSV
df.to_csv("./wavData.csv", index=False)