In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa as lr
import librosa.display
from IPython.display import Audio
import warnings
import math
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, LSTM
from keras.models import model_from_json
from keras.utils import to_categorical
import noisereduce as nr
%matplotlib inline

import os

Using TensorFlow backend.


In [2]:
warnings.filterwarnings("ignore")
SR = 16000
n_mfcc = 10

In [3]:
LENGTH = 16000
EXTRA_LENGTH = 5000
def significant_part(data):
    curr_data = data.copy()
    
    curr_data = np.concatenate((np.zeros((EXTRA_LENGTH,), dtype=float), curr_data), axis=None)
    curr_data = np.concatenate((curr_data, np.zeros((EXTRA_LENGTH,), dtype=float)), axis=None)
    if len(curr_data) <= LENGTH + 2 * EXTRA_LENGTH:
        zeros = LENGTH - len(curr_data) + 3 + 2 * EXTRA_LENGTH
        curr_data = np.concatenate((curr_data, np.zeros((zeros,), dtype=float)), axis=None)
    
    max_sum = 0
    result = curr_data[0:LENGTH + 2 * EXTRA_LENGTH]
    for i in range(EXTRA_LENGTH, LENGTH + EXTRA_LENGTH):
        max_sum += abs(curr_data[i])
        
    curr_sum = max_sum
    for i in range(LENGTH + EXTRA_LENGTH, len(curr_data) - EXTRA_LENGTH):
        curr_sum += abs(curr_data[i])
        curr_sum -= abs(curr_data[i - LENGTH])
        
        if curr_sum > max_sum:
            max_sum = curr_sum
            result = curr_data[i - LENGTH - EXTRA_LENGTH:i + EXTRA_LENGTH]
    
    return result

In [4]:
def load_test_data(path):
    voices, files = [], []
    for dirpath, dirnames, filenames in os.walk(path):
    
        for filename in filenames:
            audio, sr = lr.load(os.path.join(dirpath, filename), mono=True, sr=SR)
            audio = significant_part(audio)
            voices += [audio]
            files += [filename]
            
    return voices, files

In [5]:
def get_mfccs(voices):
    return [librosa.feature.mfcc(voice, sr=SR, n_mfcc=n_mfcc) for voice in voices]

In [6]:
json_file = open('result_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights("result_model_weights.h5")

In [7]:
test_voices, test_filenames = load_test_data("./Tests")

In [8]:
test_mfccs = get_mfccs(test_voices)

In [9]:
max_mfcc_length = test_mfccs[0].shape[1]

In [10]:
X_test = np.asarray(test_mfccs)

In [11]:
X_test = X_test.reshape(X_test.shape[0], n_mfcc, max_mfcc_length, 1)

In [12]:
predictions = model.predict(X_test)

In [13]:
def normilize(prediction):
    normilized_prediction = []
    prediction_sum = sum(prediction)
    
    for prob in prediction:
        normilized_prediction.append(float(prob) / prediction_sum)
    
    return normilized_prediction  

In [14]:
normilized_predictions = []
for index in range(0, len(predictions)):
    curr_predict = normilize(predictions[index])
    normilized_predictions.append(curr_predict)

In [15]:
test_filenames = [x[:-4] for x in test_filenames]

In [16]:
df_sub = pd.DataFrame()
df_sub[0] = pd.DataFrame(test_filenames)[0]
df_sub[1] = pd.DataFrame(normilized_predictions)[0]
df_sub[2] = pd.DataFrame(normilized_predictions)[1]
df_sub[3] = pd.DataFrame(normilized_predictions)[2]
df_sub[4] = pd.DataFrame(normilized_predictions)[3]
df_sub[5] = pd.DataFrame(normilized_predictions)[4]
df_sub.to_csv("10.csv", header=False, index=False)