# Data Preparation

In [1]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline

import librosa
import librosa.display
import IPython.display

import os
import sys
import logging
import progressbar

In [2]:
progressbar.streams.wrap_stderr()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

INFO:__main__:Number of features: 80
INFO:__main__:Array read from file datasetArray80.gz
INFO:__main__:Neural network of depth 3
INFO:tensorflow:Restoring parameters from models/2017-11-24-03:49:34-0.905953/session.ckpt
INFO:__main__:Accuracy: 0.913870
100% (3597 of 3597) |#####################| Elapsed Time: 0:00:00 Time: 0:00:00


In [3]:
class FeatureExtraction:
    def __init__(self, filename, n_mels=128):
        try:
            if not os.path.isfile(filename):
                raise
            self.filename = filename
        except:
            logger.error("%s does not exists or is not a file", filename)
            sys.exit()
            
        self.n_mels = n_mels
        self.y = None
        self.sr = None
        self.S = None
        self.log_S = None
        self.mfcc = None
        self.delta_mfcc = None
        self.delta2_mfcc = None
        self.M = None
        self.rmse = None
    
    def loadFile(self):
        self.y, self.sr = librosa.load(self.filename)
        logger.debug('File loaded %s', self.filename)
    
    def melspectrogram(self):
        self.S = librosa.feature.melspectrogram(self.y, sr=self.sr, n_mels=self.n_mels)
        self.log_S = librosa.logamplitude(self.S, ref_power=np.max)
    
    def plotmelspectrogram(self):
        plt.figure(figsize=(12, 4))
        librosa.display.specshow(self.log_S, sr=self.sr, x_axis='time', y_axis='mel')
        plt.title('mel Power Spectrogram')
        plt.colorbar(format='%+02.0f dB')
        plt.tight_layout()
    
    def extractmfcc(self, n_mfcc=13):
        self.mfcc = librosa.feature.mfcc(S=self.log_S, n_mfcc=n_mfcc)
        self.delta_mfcc = librosa.feature.delta(self.mfcc)
        self.delta2_mfcc = librosa.feature.delta(self.mfcc, order=2)
        self.M = np.vstack([self.mfcc, self.delta_mfcc, self.delta2_mfcc])
    
    def plotmfcc(self):
        plt.figure(figsize=(12, 6))
        plt.subplot(3, 1, 1)
        librosa.display.specshow(self.mfcc)
        plt.ylabel('MFCC')
        plt.colorbar()
        
        plt.subplot(3, 1, 2)
        librosa.display.specshow(self.delta_mfcc)
        plt.ylabel('MFCC-$\Delta$')
        plt.colorbar()
        
        plt.subplot(3, 1, 3)
        librosa.display.specshow(self.delta2_mfcc, sr=self.sr, x_axis='time')
        plt.ylabel('MFCC-$\Delta^2$')
        plt.colorbar()
        
        plt.tight_layout()
    
    def extractrmse(self):
        self.rmse = librosa.feature.rmse(y=self.y)

In [4]:
class Dataset:
    def __init__(self, datasetDir, datasetLabelFilename, datasetArrayFilename):
        try:
            if not os.path.isdir(datasetDir):
                raise
            self.datasetDir = datasetDir
            logger.debug("Dataset Directory: %s", self.datasetDir)
        except:
            logger.error("%s does not exists or is not a directory", datasetDir)
            sys.exit()
        
        try:
            if not os.path.isfile(datasetLabelFilename):
                raise
            self.datasetLabelFilename = datasetLabelFilename
            logger.debug("Dataset labels filename: %s", self.datasetLabelFilename)
        except:
            logger.error("%s does not exists or is not a file", datasetLabelFilename)
            sys.exit()
        
        self.datasetArrayFilename = datasetArrayFilename
        logger.debug("Dataset array filename: %s", self.datasetArrayFilename)
        
        self.n_features = 80
        logger.info("Number of features: %s", self.n_features)
        self.X = np.empty(shape=(0, self.n_features))
        self.Y = np.empty(shape=(0, 2))
        
    
    def build(self):
        num_lines = sum(1 for line in open(self.datasetLabelFilename, 'r'))
        with open(self.datasetLabelFilename, 'r') as datasetLabelFile:
            filesProcessed=0
            pbar = progressbar.ProgressBar(redirect_stdout=True)
            for line in pbar(datasetLabelFile, max_value=num_lines):
                lineSplit = line.strip().split(' ')
                audiofilename = lineSplit[0]
                label = lineSplit[1]
                try:
                    features = FeatureExtraction(os.path.join(self.datasetDir, audiofilename))
                    features.loadFile()
                    features.melspectrogram()
                    features.extractmfcc()
                    features.extractrmse()
                except ValueError:
                    logger.warning("Error extracting features from file %s", audiofilename)
                    continue
                
                featureVector = []
                for feature in features.mfcc:
                    featureVector.append(np.mean(feature))
                    featureVector.append(np.var(feature))
                
                for feature in features.delta_mfcc:
                    featureVector.append(np.mean(feature))
                    featureVector.append(np.var(feature))
                
                for feature in features.delta2_mfcc:
                    featureVector.append(np.mean(feature))
                    featureVector.append(np.var(feature))
                
                featureVector.append(np.mean(features.rmse))
                featureVector.append(np.var(features.rmse))
                
                self.X = np.vstack((self.X, [featureVector]))
                
                if label == "STUTTER":
                    self.Y = np.vstack((self.Y, [0, 1]))
                elif label == "NORMAL":
                    self.Y = np.vstack((self.Y, [1, 0]))
                else:
                    logger.error("Unexpected label: %s", label)
                    sys.exit()
                
                filesProcessed += 1            
            
            logger.info("Total files processed: %d", filesProcessed)
    
    def writeToFile(self, filename=None):
        if filename == None:
            filename = self.datasetArrayFilename
            
        if os.path.exists(filename):
            os.remove(filename)
        np.savetxt(filename, np.hstack((self.X, self.Y)))
        logger.info("Array stored in file %s", filename)
    
    def readFromFile(self, filename=None):
        if filename == None:
            filename = self.datasetArrayFilename
            
        if not os.path.isfile(filename):
            logger.error("%s does not exists or is not a file", filename)
            sys.exit()
        matrix = np.loadtxt(filename)
        self.X = matrix[:, 0:self.n_features]
        self.Y = matrix[:, self.n_features:]
        logger.info("Array read from file %s", filename)

In [5]:
dataset = Dataset('dataset', 'datasetLabels.txt', 'datasetArray80.gz')
if not os.path.isfile(dataset.datasetArrayFilename):
    dataset.build()
    dataset.writeToFile()
else:
    dataset.readFromFile()

# Tensorflow binary classification

In [6]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import datetime
import os

In [7]:
class NeuralNetwork:
    def __init__(self, X_train, Y_train, X_test, Y_test):
        # Data
        self.X_train = X_train
        self.Y_train = Y_train
        self.X_test = X_test
        self.Y_test = Y_test
        
        # Learning Parameters
        self.learning_rate = 0.001
        self.training_epochs = 3500
        self.batch_size = 100
        self.display_step = 100

        # Model Parameters
        self.n_hidden = [10, 10, 10]
        self.hiddenLayers = len(self.n_hidden)
        self.n_input = dataset.n_features
        self.n_classes = 2

        logger.info("Neural network of depth %d", self.hiddenLayers)
        for i in range(self.hiddenLayers):
            logger.debug("Depth of layer %d is %d", (i + 1), self.n_hidden[i])

        self.x = tf.placeholder("float", [None, self.n_input])
        self.y = tf.placeholder("float", [None, self.n_classes])
        self.layer = None
        self.weights = None
        self.biases = None
        # Model
        self.model = self.network(self.x)
        self.save_path = None

        # Loss function and optimizer
        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.model, labels=self.y))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost)

        # Initialize the variables
        self.init = tf.global_variables_initializer()

    def network(self, x):
        self.layer = []
        self.weights = []
        self.biases = []

        for n_layer in range(self.hiddenLayers):
            if n_layer == 0:
                self.weights.append(tf.Variable(tf.random_normal([self.n_input, self.n_hidden[n_layer]])))
                self.biases.append(tf.Variable(tf.random_normal([self.n_hidden[n_layer]])))
                self.layer.append(tf.nn.relu(tf.add(tf.matmul(x, self.weights[n_layer]), self.biases[n_layer])))
            else:
                self.weights.append(tf.Variable(tf.random_normal([self.n_hidden[n_layer - 1], self.n_hidden[n_layer]])))
                self.biases.append(tf.Variable(tf.random_normal([self.n_hidden[n_layer]])))
                self.layer.append(tf.nn.relu(tf.add(tf.matmul(self.layer[n_layer - 1], self.weights[n_layer]), self.biases[n_layer])))


        # Output layer
        self.weights.append(tf.Variable(tf.random_normal([self.n_hidden[self.hiddenLayers - 1], self.n_classes])))
        self.biases.append(tf.Variable(tf.random_normal([self.n_classes])))
        self.layer.append(tf.matmul(self.layer[self.hiddenLayers - 1], self.weights[self.hiddenLayers]) + self.biases[self.hiddenLayers])

        return self.layer[self.hiddenLayers]
    
    def train(self):
        logger.info("Training the neural network")
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(self.init)
            pbar = progressbar.ProgressBar(redirect_stdout=True)
            for epoch in pbar(range(self.training_epochs)):
                avg_cost = 0
                total_batch = int(len(self.X_train) / self.batch_size)
                X_batches = np.array_split(self.X_train, total_batch)
                Y_batches = np.array_split(self.Y_train, total_batch)

                for i in range(total_batch):
                    batch_x, batch_y = X_batches[i], Y_batches[i]
                    # Run optimization op (backprop) and cost op (to get loss value)
                    _, c = sess.run([self.optimizer, self.cost], feed_dict={self.x: batch_x, self.y: batch_y})

                    # Compute average loss
                    avg_cost += c / total_batch

                # Display logs per epoch step
                if epoch % self.display_step == 0:
                    logger.debug("cost = %.9f", avg_cost)
            logger.info("Optimization Finished!")

            evalAccuracy = self.getAccuracy()
            
            global result 
            result = tf.argmax(self.model, 1).eval({self.x: X_test, self.y: Y_test})
            
            if not os.path.isdir("models"):
                os.makedirs("models")
            timestamp = '{:%Y-%m-%d-%H:%M:%S}'.format(datetime.datetime.now()) + '-' + str(evalAccuracy)
            os.makedirs(os.path.join("models", timestamp))
            modelfilename =  os.path.join(os.path.join("models", timestamp), 'session.ckpt')
            self.save_path = saver.save(sess, modelfilename)
            
            with open(os.path.join(os.path.join("models", timestamp), 'details.txt'), 'w') as details:
                details.write("learning_rate = " + str(self.learning_rate) + "\n")
                details.write("training_epochs = " + str(self.training_epochs) + "\n")
                details.write("batch_size = " + str(self.batch_size) + "\n")
                details.write("display_step = " + str(self.display_step) + "\n")
                details.write("n_hidden = " + str(self.n_hidden) + "\n")
                details.write("hiddenLayers = " + str(self.hiddenLayers) + "\n")
                details.write("n_input = " + str(self.n_input) + "\n")
                details.write("n_classes = " + str(self.n_classes) + "\n")
                
            logger.info("Model saved in file: %s" % self.save_path)
    
    def getModelPath(self):
        return self.save_path
        
    def getAccuracy(self):
        # Test model
        correct_prediction = tf.equal(tf.argmax(self.model, 1), tf.argmax(self.y, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        evalAccuracy = accuracy.eval({self.x: self.X_test, self.y: self.Y_test})
        logger.info("Accuracy: %f", evalAccuracy)
        return evalAccuracy
        
    def loadFromFile(self, filename):
        saver = tf.train.Saver()
        with tf.Session() as sess:
            saver.restore(sess, filename)
            self.getAccuracy()
            

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(dataset.X, dataset.Y)

In [9]:
tf.reset_default_graph()
nn = NeuralNetwork(X_train, Y_train, X_test, Y_test)
#nn.train()
nn.loadFromFile('models/2017-11-24-03:49:34-0.905953/session.ckpt')

INFO:tensorflow:Restoring parameters from models/2017-11-24-03:49:34-0.905953/session.ckpt


# Using the NN model for classification

In [10]:
import librosa

In [11]:
y, sr = librosa.load('wav/release1/M_1106_25y0m_1.wav')

In [12]:
segmentLength = 250
segmentHop = 50
samplesPerSegment = int(segmentLength * sr / 1000)
samplesToSkipPerHop = int(segmentHop * sr / 1000)
upperLimit = int(len(y) - samplesPerSegment)

pbar = progressbar.ProgressBar()
for start in pbar(range(0, upperLimit, samplesToSkipPerHop)):
    end = start + samplesPerSegment
    audio = y[start:end]
audio = y[end:]