In [49]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score
import librosa
import numpy as np

In [50]:
try:
    data = pd.read_pickle('data.pkl')
except:
    print('data frame not found',
          'run prepare_dataframe.py')
    
scoring = []
frame_length = 8000

In [52]:
def expand(column):
    '''
    elements in column are assumed to be np.arrays
    returns series of splitted np.arrays (with overlap!)

    '''
    col = column.apply(librosa.util.frame, 
                 frame_length=frame_length, 
                 hop_length=frame_length)
    col = col.apply(np.transpose).explode()
    return col

def mfccify(x):
    '''
    return mfcc of vector np.array x
    '''
    xmfcc = librosa.feature.mfcc(y=x, sr=8000,
                                hop_length=1024, 
                                htk=True).flatten()
    return xmfcc

def cqtify(x):
    '''
    return cqt of vector np.array x
    '''
    xcqt = librosa.core.cqt(y=x, sr=8000,
                             hop_length=1024,nbins=3).flatten()

def sum_threshold(x, threshold=1600):
    '''
    x is assumed to be vector of 0s and 1s
    returns 1 if more than threshold entries are active
    '''
    x = np.sum(x) > threshold
    return float(x)

def convert_to_numpy(series):
    '''
    takes pandas series of np.arrays
    returns the np.matrix equivalent
    '''
    m, n = series.shape[0], series.iloc[0].shape[0]
    D = np.zeros([m,n])
    for idx, data in enumerate(series):
        D[idx,:] = data
    return D

def evaluate(model,X_test,y_test):
    '''
    return testset roc_auc, accuracy and confusion matrix
    '''
    y_predict = model.predict(X_test)
    roc = roc_auc_score(y_test, y_predict)
    acc = accuracy_score(y_test, y_predict)
    conf = confusion_matrix(y_test, y_predict)
    return roc, acc, conf

In [None]:
for _ in range(30):
    '''
    cross validation of support vector machine on mfccs
    whole data events (one train passes) are left out
    for robust estimation (possible bleed of train info -> test info impossible)
    '''
    data_test = data.sample(5) # take 5 random rows
    data_train = data.drop(data_test.index) # drop test from train

    # expand, mfccify and convert to numpy
    X_train = convert_to_numpy((expand(data_train['audio_content'])
               .apply(mfccify)))
    y_train = (expand(data_train['target_vector'])
               .apply(sum_threshold)
               .to_numpy())

    X_test = convert_to_numpy((expand(data_test['audio_content'])
               .apply(mfccify)))
    y_test = (expand(data_test['target_vector'])
              .apply(sum_threshold)
              .to_numpy())


    # support vector machine with radial basis with short kernel width (high complexity)
    # use class weights to tackle imbalanced classes
    clf = SVC(C=10, gamma=1e-06, 
              kernel='linear', tol=0.001,
              class_weight='balanced',
              probability=True)

    # train / test model
    clf.fit(X_train,y_train)
    scoring.append(evaluate(clf, X_test, y_test))

    # print roc_auc
    print(scoring[-1][0])
    conf_mats = []
    for i in range(len(scoring)):
        conf_mats.append(scoring[i][2])
    conf_mats = np.array(conf_mats)
    print(conf_mats.sum(axis=0))

0.6604122621564482
[[1537   74]
 [  97   38]]
0.6900386847195358
[[1711   88]
 [ 109   48]]
0.5848639455782313
[[1888  107]
 [ 120   52]]


In [43]:
conf_mat = conf_mats.sum(axis=0)

In [44]:
np.round(conf_mat * 100 / np.sum(conf_mat),2) # confusion matrix in percentage

array([[70.58, 19.6 ],
       [ 1.85,  7.96]])

In [45]:
np.mean([run[0] for run in scoring]) # mean roc_auc

0.785069825688323

# REPORT:

we find a mean roc_auc of approximately 0.80.


Interpreting the confusion matrix we see :

- a 1.5 % chance of
our classifier to miss a faulty train. (False Positive)

- there is a 19.3 % chance of a train classified
as faulty, to actually be without harm. (False Negative)

- 71.1 % percent of the audio was correctly classified as unsuspicious.

- 8.2 % percent of the audio was correctly classified as suspicious