In [1]:
import pandas as pd
import librosa
import numpy as np

In [20]:
#I Had to do it per machine so I could run it at different moments
df=pd.read_csv('valve_general.csv')

In [3]:
def load_sound(p):
    y, sr = librosa.load(p, sr=None)
    return y
    
def chromagram(y, sr):
    spec=np.abs(librosa.stft(y))
    chroma=np.mean(librosa.feature.chroma_stft(S=spec, sr=sr).T,axis=0)
    chroma_f= np.array((np.amin(chroma), np.amax(chroma), np.mean(chroma), np.std(chroma)))
    return chroma_f

def melspec(y, sr):
    mel=np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T,axis=0)
    mel_f= np.array((np.amin(mel), np.amax(mel), np.mean(mel), np.std(mel)))
    return mel_f
 
def mfcc(y, sr):
    mfc=np.mean(librosa.feature.mfcc(y=y, sr=sr).T, axis=0)
    mfc_f= np.array((np.amin(mfc), np.amax(mfc), np.mean(mfc), np.std(mfc)))
    return mfc_f

def centroid(y, sr):
    cent=librosa.feature.spectral_centroid(y=y, sr=sr)
    cent_f= np.array((np.amin(cent), np.amax(cent), np.mean(cent), np.std(cent)))
    return cent_f

def onset_number(y):
    onset = librosa.onset.onset_detect(y=y, sr=22050, units='time')
    return onset.shape[0]
    
def amplitude_envelope(y):
    frame_size=1024
    hop_length=512
    ampl=np.array([max(y[i:i+frame_size]) for i in range(0, len(y),hop_length)])
    return np.array((np.amin(ampl), np.amax(ampl), np.mean(ampl), np.std(ampl)))

def rms(y):
    root=librosa.feature.rms(y)
    return np.array((np.amin(root), np.amax(root), np.mean(root), np.std(root)))

def zcr(y):
    zero=librosa.feature.zero_crossing_rate(y)
    return np.array((np.amin(zero), np.amax(zero), np.mean(zero), np.std(zero)))

def bandwith(y):
    bw=librosa.feature.spectral_bandwidth(y, sr=160000)
    return np.array((np.amin(bw), np.amax(bw), np.mean(bw), np.std(bw)))
    
def get_features(y):
    sr=160000
    chroma=chromagram(y, sr=sr)
    mel=melspec(y, sr=sr)
    mfc_coef=mfcc(y, sr=sr)
    cetr=centroid(y, sr=sr)
    onst=onset_number(y)
    ampl=amplitude_envelope(y)
    root=rms(y)
    zero=zcr(y)
    band=bandwith(y)
    feature_matrix=np.array([])
    feature_matrix=np.hstack((chroma, mel, mfc_coef, cetr, onst, ampl, root, zero))
    return feature_matrix


In [4]:
def load_features():
    X, y = [], []
    for i in df.path.values:
        X.append(get_features(load_sound(i)))
        if i.split('/')[3] == 'normal':
            target= int(1)
        else:
            target=int(0)
        y.append(target)
    return np.array(X), np.array(y)

In [5]:
features, condition = load_features()

In [6]:
df_pump_features=pd.DataFrame(features)
df_pump_target=pd.DataFrame(condition)

In [7]:
df_pump_features.to_csv('df_pump_feature.csv', index=False)
df_pump_target.to_csv('df_pump_target.csv', index=False)