In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import librosa
from glob import glob
import IPython.display as ipd
import librosa.display

In [28]:
# Reading the normal audio files in an array.
norm_audio_files = []
for i in range(0,7,2):
    data_dir = f"0_dB_slider/slider/id_0{i}/normal"
    norm_audio_files += glob(data_dir + '/*.wav')
x = len(norm_audio_files)
x   

3204

In [29]:
# Reading the abnormal audio files in an array.
abnorm_audio_files = []
for i in range(0,7,2):
    data_dir = f"0_dB_slider/slider/id_0{i}/abnormal"
    abnorm_audio_files += glob(data_dir + '/*.wav')
y = len(abnorm_audio_files)
y  

890

### Features extraction

In [30]:
def zero_crossing_rate(data):
    zero_crossings = librosa.feature.zero_crossing_rate(data)
    return zero_crossings

def spec_centroid(data, sr):
    spectral_centroids = librosa.feature.spectral_centroid(data, sr=sr)[0]
    return spectral_centroids

def spec_rolloff(data, sr):
    spectral_rolloff = librosa.feature.spectral_rolloff(data, sr=sr)[0]
    return  spectral_rolloff

def rmse(data):
    rms_audio = librosa.feature.rms(data)[0]
    return rms_audio

def flatness(data):
    flatness = librosa.feature.spectral_flatness(data)[0]
    return flatness

def amplitude_envelope(data):
    frame_size=1024
    hop_length=512
    ampli = np.array([max(data[i:i+frame_size]) for i in range(0, len(data),hop_length)])
    return ampli 

def mel_spectrogram(data,sr):
    mel_spec = librosa.feature.melspectrogram(y=data, sr=sr)
    return mel_spec

In [31]:
from tqdm import tqdm
extracted_feature = []

for file in tqdm(norm_audio_files):
    file_name = file
    class_label = 0 # 0 for normal.
    data, sr = librosa.load(file_name)
    y_min = data.min()
    y_max = data.max()
    y_mean = data.mean()
    y_std = data.std()
    
    zcr = zero_crossing_rate(data)
    zcr_mean = zcr.mean()
    zcr_max = zcr.max()
    zcr_min = zcr.min()
    
    spec_cent = spec_centroid(data,sr)
    spec_cent_min = spec_cent.min()
    spec_cent_mean = spec_cent.mean()

    spec_roll = spec_rolloff(data, sr)
    spec_roll_min = spec_roll.min()  
    spec_roll_mean = spec_roll.mean() 

    rms = rmse(data)
    rms_mean = rms.mean()
        
    flat = flatness(data)
    flat_mean = flat.mean()
    flat_max = flat.max()
        
    amplitude = amplitude_envelope(data)
    amp_mean = amplitude.mean()
    amp_min = amplitude.min()
        
    mel_spec = mel_spectrogram(data,sr)
    melspec_mean = mel_spec.mean()
    melspec_std = mel_spec.std()

    extracted_feature.append([file_name, y_min, y_max, y_mean, y_std, zcr_mean, zcr_min, zcr_max,
                              spec_cent_min, spec_cent_mean, spec_roll_min, spec_roll_mean, rms_mean,
                              flat_mean, flat_max, amp_mean, amp_min, melspec_mean, melspec_std, class_label])



100%|████████████████████████████████████████████████████████████| 3204/3204 [27:22<00:00,  1.95it/s]


In [32]:
for file in tqdm(abnorm_audio_files):
    file_name = file
    class_label = 1 # 1 for abnormal.
    data, sr = librosa.load(file_name)
    y_min = data.min()
    y_max = data.max()
    y_mean = data.mean()
    y_std = data.std()
    
    zcr = zero_crossing_rate(data)
    zcr_mean = zcr.mean()
    zcr_max = zcr.max()
    zcr_min = zcr.min()
    
    spec_cent = spec_centroid(data,sr)
    spec_cent_min = spec_cent.min()
    spec_cent_mean = spec_cent.mean()

    spec_roll = spec_rolloff(data, sr)
    spec_roll_min = spec_roll.min()  
    spec_roll_mean = spec_roll.mean() 

    rms = rmse(data)
    rms_mean = rms.mean()
        
    flat = flatness(data)
    flat_mean = flat.mean()
    flat_max = flat.max()
        
    amplitude = amplitude_envelope(data)
    amp_mean = amplitude.mean()
    amp_min = amplitude.min()
        
    mel_spec = mel_spectrogram(data,sr)
    melspec_mean = mel_spec.mean()
    melspec_std = mel_spec.std()

    extracted_feature.append([file_name, y_min, y_max, y_mean, y_std, zcr_mean, zcr_min, zcr_max,
                              spec_cent_min, spec_cent_mean, spec_roll_min, spec_roll_mean, rms_mean,
                              flat_mean, flat_max, amp_mean, amp_min, melspec_mean, melspec_std, class_label])


100%|██████████████████████████████████████████████████████████████| 890/890 [07:17<00:00,  2.03it/s]


In [33]:
extracted_features_df = pd.DataFrame(extracted_feature,columns=['file_name', 'y_min', 'y_max', 'y_mean', 'y_std', 
                                                                'zcr_mean', 'zcr_min', 'zcr_max','spec_cent_min', 
                                                                'spec_cent_mean', 'spec_roll_min', 'spec_roll_mean',
                                                                'rms_mean','flat_mean', 'flat_max', 'amp_mean', 
                                                                'amp_min', 'melspec_mean', 'melspec_std', 'class'])
extracted_features_df.head()

Unnamed: 0,file_name,y_min,y_max,y_mean,y_std,zcr_mean,zcr_min,zcr_max,spec_cent_min,spec_cent_mean,spec_roll_min,spec_roll_mean,rms_mean,flat_mean,flat_max,amp_mean,amp_min,melspec_mean,melspec_std,class
0,0_dB_slider/slider/id_00/normal/00000773.wav,-0.02687,0.028764,-1.398502e-06,0.005328,0.047631,0.02002,0.069336,1014.801047,1372.581294,2336.352539,3214.092862,0.005268,0.000342,0.007224,0.013499,0.00709,0.006609,0.029762,0
1,0_dB_slider/slider/id_00/normal/00000632.wav,-0.02404,0.028493,-4.029802e-06,0.005365,0.041871,0.019043,0.087402,720.012314,985.649834,1173.55957,2035.462247,0.00533,0.000147,0.001054,0.014137,0.007547,0.006682,0.024763,0
2,0_dB_slider/slider/id_00/normal/00000238.wav,-0.025671,0.027518,5.779765e-07,0.005878,0.045,0.019531,0.06543,919.668972,1086.078007,1991.821289,2356.461853,0.005845,0.000183,0.002889,0.0154,0.007993,0.008053,0.028967,0
3,0_dB_slider/slider/id_00/normal/00000148.wav,-0.022808,0.022871,-2.906805e-07,0.005626,0.055353,0.019531,0.078125,1165.522377,1394.776445,2540.917969,3284.712775,0.005581,0.000322,0.010397,0.01458,0.008242,0.007418,0.029873,0
4,0_dB_slider/slider/id_00/normal/00000388.wav,-0.023353,0.0236,-5.806836e-07,0.005836,0.040303,0.017578,0.058594,780.914492,938.608348,1582.69043,1914.206832,0.005797,0.000131,0.004567,0.015175,0.008566,0.007925,0.029198,0


In [34]:
df = extracted_features_df
df.to_csv("slider_features.csv" , index =False)