In [27]:
import os
import librosa
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import librosa.display

In [17]:
# get the parent directory
parent_dir = os.path.abspath('..')
test_dir = os.path.join(parent_dir, 'Testing')
data_dir = os.path.join(parent_dir, 'Data')
EDA_dir = os.path.join(parent_dir, 'EDA')
audioAnalysis_dir = os.path.join(parent_dir, 'AudioAnalysisFunctions')

In [22]:
def get_melspectogram(file_path, window, overlap=None, n_fft=None):
    # Load the signal
    signal, fs = librosa.load(file_path, sr=None)
    
    # Window length in samples
    window_length = int(window*fs)
    
    # Default overlap is 50% of the window size
    if overlap==None:
        overlap = 0.5
    
    # Overlap in samples
    overlap_length = int(window_length*overlap)
    
    # Hop length in samples
    hop_length = window_length-overlap_length
    
    # Default n_fft is the smallest power of 2 larger than win_length
    if n_fft==None:
        n_fft = int(2**np.ceil(np.log2(window_length)))
   
    # Compute mel spectogram
    mel_spect = librosa.feature.melspectrogram(y=signal, sr=fs, 
                                               win_length=window_length, 
                                               hop_length=overlap_length,
                                               n_fft=n_fft)
    # Mel spectogram in decibels
    mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)

    params = {}
    params['window'] = window
    params['window_length'] = window_length
    params['overlap'] = overlap
    params['overlap_length'] = overlap_length
    params['n_fft'] = n_fft
    
    return mel_spect, mel_spect_db, params

In [19]:
fan_test_set = pd.read_csv(os.path.join(test_dir, 'fan_subset.csv'))

In [20]:
fan_test_set.iloc[0]['file_path']

'C:\\Users\\aubin\\beCode\\anomalyDetection\\AudioMalfunctionDetection\\Data\\fan\\id_00\\abnormal\\00000020.wav'

In [49]:
mel_spect, mel_spect_db, params = get_melspectogram(fan_test_set.iloc[0]['file_path'], window = 1)

In [55]:
all_mel_spect = []
all_mel_spect_db = []
all_params = []

for idx, row in fan_test_set.iterrows():
    mel_spect, mel_spect_db, params = get_melspectogram(row['file_path'], window = 1)
    all_mel_spect.append(mel_spect)
    all_mel_spect_db.append(mel_spect_db)
    all_params.append(params)

In [59]:
fan_test_set['mel_spect'] = all_mel_spect
fan_test_set['mel_spect_db'] = all_mel_spect_db
fan_test_set['all_params'] = all_params

In [60]:
fan_test_set

Unnamed: 0.1,Unnamed: 0,anomaly,machine_type,model_type,length,filename,file_path,mel_spect,mel_spect_db,all_params
0,20,1,fan,id_00,10.0,00000020.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[5.618998, 3.2998428, 5.666052, 4.8847017, 5....","[[-14.090825, -16.402481, -14.054608, -14.6990...","{'window': 1, 'window_length': 16000, 'overlap..."
1,194,1,fan,id_00,10.0,00000194.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[4.580413, 2.4774358, 3.927845, 2.3824515, 2....","[[-6.4593463, -9.128368, -7.126849, -9.298152,...","{'window': 1, 'window_length': 16000, 'overlap..."
2,55,1,fan,id_00,10.0,00000055.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[4.183343, 4.457312, 4.135027, 4.5470915, 4.7...","[[-15.383171, -15.107676, -15.433622, -15.0210...","{'window': 1, 'window_length': 16000, 'overlap..."
3,204,1,fan,id_00,10.0,00000204.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[4.1052003, 4.671229, 4.3043075, 2.7643504, 4...","[[-8.204632, -7.643664, -7.998943, -9.922045, ...","{'window': 1, 'window_length': 16000, 'overlap..."
4,173,1,fan,id_00,10.0,00000173.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[2.533962, 2.141419, 2.2395484, 2.5685048, 2....","[[-10.122735, -10.85372, -10.659132, -10.06393...","{'window': 1, 'window_length': 16000, 'overlap..."
...,...,...,...,...,...,...,...,...,...,...
1595,4712,0,fan,id_06,10.0,00000177.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[0.11784176, 0.17334014, 0.27341798, 0.161433...","[[-24.063358, -22.387358, -20.408077, -22.6964...","{'window': 1, 'window_length': 16000, 'overlap..."
1596,4810,0,fan,id_06,10.0,00000275.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[0.09672496, 0.067297466, 0.043961555, 0.0370...","[[-24.887087, -26.462486, -28.31174, -29.05730...","{'window': 1, 'window_length': 16000, 'overlap..."
1597,5100,0,fan,id_06,10.0,00000565.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[1.2709379, 1.165696, 0.40480444, 0.26515284,...","[[-13.81316, -14.18855, -18.781952, -20.61944,...","{'window': 1, 'window_length': 16000, 'overlap..."
1598,4548,0,fan,id_06,10.0,00000013.wav,C:\Users\aubin\beCode\anomalyDetection\AudioMa...,"[[0.23120412, 0.15911241, 0.24410091, 0.200400...","[[-22.021666, -23.64458, -21.785927, -22.64262...","{'window': 1, 'window_length': 16000, 'overlap..."


In [68]:
total_size = fan_test_set.iloc[0]['mel_spect_db'].shape[0] * fan_test_set.iloc[0]['mel_spect_db'].shape[1]

In [101]:
X = []

for idx, row in fan_test_set.iterrows():
    #X.append(row['mel_spect_db'].reshape(1, total_size))
    X.append(np.array(row['mel_spect_db']).flatten())

In [125]:
len(y_train)

320

In [115]:
y = fan_test_set['anomaly']

In [126]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [127]:
from sklearn.svm import OneClassSVM
from sklearn.datasets import make_blobs
from numpy import quantile, where, random
import matplotlib.pyplot as plt

In [129]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train,y_train)

predicted= model.predict(X_test) 


[1 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 1 0 0 1 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1
 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0
 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0
 1 1 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 1
 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1
 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0
 0 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 1
 1 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0]


In [130]:
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, predicted))

Accuracy: 0.975


In [133]:
svm = OneClassSVM(kernel='rbf', gamma=0.001, nu=0.01)

svm.fit(X_train)
pred = svm.predict(X_train)

In [139]:
from collections import Counter
print(Counter(pred))

Counter({-1: 799, 1: 481})


In [None]:
X_anomaly = X_train[anom_index]

plt.scatter(X_train[:,0], X_train[:,1])
plt.scatter(X_anomaly[:,0], X_anomaly[:,1], color='r')
plt.show()