In [87]:
import os
import librosa
import _pickle as cPickle
from scipy.io.wavfile import read
from pathlib import Path
import warnings
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc
from sklearn import metrics
from keras.utils import to_categorical

In [88]:
train_file ="C:\\Users\\B43E\\python\\py\\data\\f_list\\wav_list_train.txt"    
test_file ="C:\\Users\\B43E\\python\\py\\data\\f_list\\wav_list_test.txt" 
def calculate_delta(array):
    """Calculate and returns the delta of given feature vector matrix"""

    rows,cols = array.shape
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
                first = 0
            else:
                first = i-j
            if i+j > rows -1:
                second = rows -1
            else:
                second = i+j
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

def extract_features(audio,rate):
    """extract 20 dim mfcc features from an audio, performs CMS and combines 
    delta to make it 40 dim feature vector"""    
    
    mfcc_feat = mfcc.mfcc(audio,rate, 0.025, 0.01,20,appendEnergy = True)
    
    mfcc_feat = preprocessing.scale(mfcc_feat)
    delta = calculate_delta(mfcc_feat)
    combined = np.hstack((mfcc_feat,delta)) 
    return combined
def get_features(file_path): 
    file_paths = open(file_path,'r')
    features_normalize = []
    total_feature = []
    features = np.asarray(())
    for path in file_paths:    
        path = path.strip()
        file_name=os.path.basename(path)
        audio, sr1 = librosa.load(path, sr=44100)
        data=librosa.resample(audio, sr1, 16000)
        vector   = extract_features(data,16000)
        if features.size == 0:
            features = vector
       
        else:
            features = np.vstack((features, vector)) 

        total_feature.append(features)
    return total_feature

In [89]:
def extract_label(file_path): 
    file_paths = open(file_path,'r')
    features_normalize = []
    total_feature = []
    labels = []
    features = np.asarray(())
    for path in file_paths:    
        path = path.strip()
        file_name=os.path.basename(path)
        audio, sr1 = librosa.load(path, sr=44100)
        data=librosa.resample(audio, sr1, 16000)
        vector   = extract_features(data,16000)

        if features.size == 0:
            features = vector

        
        else:
            features = np.vstack((features, vector))


        file_name=os.path.basename(path)

        split_filename=file_name.split('_')

        molecule_name=split_filename[0]

        for r in range(features.shape[0]):

            labels.append(molecule_name)
       
    return labels

In [91]:

# Extract features from the training data 
extracted_features_train = get_features(train_file) 
final_features_train = np.vstack(extracted_features_train)
# Extract features from the testing data 
extracted_features_test = get_features(test_file) 
final_features_test = np.vstack(extracted_features_test) #تحمع اكثر من اراي بشكل عامودي
# Extract labels from the training data 
train_label_value = extract_label(train_file)
train_final_label = np.array(train_label_value)
# Extract labels from the testing data 
test_label_value = extract_label(test_file)
test_final_label = np.array(test_label_value)
#print(test_final_label)
#print(test_final_label.shape)
x_train = final_features_train
x_test = final_features_test
#print(x_train)
#print(x_test)
y_train = train_final_label 
y_test =  test_final_label
#print(y_train)
#print(y_test)


In [93]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test  = label_encoder.fit_transform(y_test)
#print(y_train)
#print(y_test)
y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
#print(y_train_hot)
#print(y_test_hot)

In [94]:
import keras
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(16, input_dim=40, activation='sigmoid'))
model.add(Dense(12, activation='sigmoid'))
# Adding the fully connected layer (outputlayer)
model.add(Dense(2, activation='softmax'))
# Compiling the model
model.compile(loss=keras.losses.categorical_crossentropy,optimizer=keras.optimizers.Adadelta(),metrics=['accuracy'])
#model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
#model.compile(optimizer=‘adam’,loss=‘binary_crossentropy’,metrics=[‘accuracy’])
model.fit(x_train, y_train_hot, batch_size=10, epochs=10, verbose=1, validation_data=(x_test, y_test_hot))

Train on 36817 samples, validate on 36817 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1579790c448>

In [82]:
y_pred = model.predict(x_test)
print(y_pred)

[[0.5368278  0.46317223]
 [0.5375644  0.4624356 ]
 [0.55186826 0.44813174]
 ...
 [0.06128747 0.9387125 ]
 [0.04628441 0.9537156 ]
 [0.04296673 0.9570333 ]]


In [84]:
pred = list()
for i in range(len(y_pred)):
    pred.append(np.argmax(y_pred[i]))

print(pred)

[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 

In [85]:
y = list()
for i in range(len(y_test_hot)):
    y.append(np.argmax(y_test_hot[i]))

print(y)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [61]:
from sklearn.metrics import accuracy_score
a = accuracy_score(pred,y)
print('Accuracy is:', a*100)


Accuracy is: 64.27096119780373
