### Load training data

In [1]:
import numpy as np
import pandas as pd

In [2]:
trainData4 = np.loadtxt('./ml/trainset/trainset_4.txt')
trainData2 = np.loadtxt('./ml/trainset/trainset_2.txt')

Add to training data with SNR 10dB neutral events with SNR 20dB

In [3]:
index = [i for i in range(0, len(trainData2))]
columns = [i for i in range(1,177)]
columns[175] = 'ev'
trainData2_df = pd.DataFrame(data=trainData2, index=index, columns = columns)

index = [i for i in range(0, len(trainData4))]
trainData4_df = pd.DataFrame(data=trainData4, index=index, columns = columns)

trainData = trainData2_df.append(trainData4_df[trainData4_df.ev == 1.0])

### Machine learning

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm

ml = RandomForestClassifier(n_estimators=500)
X = trainData.as_matrix(columns = trainData.columns[0:175])
y = trainData.ev.values

ml = ml.fit(X,y)

### Detection

In [5]:
import scipy.stats.stats as st

startMFCC = 0

def minMFCC(mfccCoeffs):
    mfccMins = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccMins.append(min(mfccCoeffs[:,i]))
    return mfccMins

def maxMFCC(mfccCoeffs):
    mfccMaxs = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccMaxs.append(max(mfccCoeffs[:,i]))
    return mfccMaxs

def medianMFCC(mfccCoeffs):
    mfccMedians = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccMedians.append(np.median(mfccCoeffs[:,i]))
    return mfccMedians

def meanMFCC(mfccCoeffs):
    mfccMeans = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccMeans.append(np.mean(mfccCoeffs[:,i]))
    return mfccMeans

def varianceMFCC(mfccCoeffs):
    mfccVars = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccVars.append(np.var(mfccCoeffs[:,i]))
    return mfccVars

def skewnessMFCC(mfccCoeffs):
    mfccSkews = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccSkews.append(st.skew(mfccCoeffs[:,i]))
    return mfccSkews

def kurtosisMFCC(mfccCoeffs):
    mfccKurts = []
    for i in range(startMFCC, len(mfccCoeffs[0])):
        mfccKurts.append(st.kurtosis(mfccCoeffs[:,i]))
    return mfccKurts

In [None]:
validation = pd.read_csv('./ml/validation.csv', sep='\t', index_col=0)

result_df = pd.DataFrame(columns=['SNR', 'concurrency'])

for snr_num in range(6 , 0, -1):
    frames_big_df = pd.DataFrame(columns=['audio_num' ,'frame_num', 'neutral', 'glass', 'gunshot', 'scream'])

    for audio_num in range(1, 30):
        audio_mfcc = np.loadtxt('./MFCC/testing/{0}_{1}.txt'.format(str("%05d" % (audio_num)), 
                                                                    str(snr_num)))

        val_len = len(validation[validation.audio_num == audio_num])

        big_window_size = 32
        for frame_num, start_frame in enumerate(range(0, (val_len*16), int(big_window_size / 2))):
            frame_big = audio_mfcc[start_frame:start_frame + big_window_size]
            predict_obj = []
            predict_obj.extend(minMFCC(np.asarray(frame_big)))
            predict_obj.extend(maxMFCC(np.asarray(frame_big)))
            predict_obj.extend(medianMFCC(np.asarray(frame_big)))
            predict_obj.extend(meanMFCC(np.asarray(frame_big)))
            predict_obj.extend(varianceMFCC(np.asarray(frame_big)))
            predict_obj.extend(skewnessMFCC(np.asarray(frame_big)))
            predict_obj.extend(kurtosisMFCC(np.asarray(frame_big)))

            r_cl = int(ml.predict(np.array(predict_obj).reshape(1,-1)))

            frames_big_df.loc[len(frames_big_df)] = [audio_num, frame_num, (1 == r_cl), (2 == r_cl), 
                                                    3 == r_cl, 4 == r_cl]

    diff = frames_big_df[~frames_big_df.isin(validation).all(1)]
    res = ((len(validation) - len(diff)) / len(validation))

    result_df.loc[len(result_df)] = [snr_num * 5, round(res, 2)]
    print(result_df)

In [7]:
result_df

Unnamed: 0,SNR,concurrency
0,30,0.93
1,25,0.92
2,20,0.92
3,15,0.91
4,10,0.87
5,5,0.57
