# Data Preparation

In [1]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline

import librosa
import librosa.display
import IPython.display

import os
import sys

In [2]:
class FeatureExtraction:
    def __init__(self, filename, n_mels=128):
        try:
            if not os.path.exists(filename) or not os.path.isfile(filename):
                raise
            self.filename = filename
        except:
            sys.exit(filename + " does not exists or is not a file")
        self.n_mels = n_mels
        self.y = None
        self.sr = None
        self.S = None
        self.log_S = None
        self.mfcc = None
        self.delta_mfcc = None
        self.delta2_mfcc = None
        self.M = None
        self.rmse = None
    
    def loadFile(self):
        self.y, self.sr = librosa.load(self.filename)
    
    def melspectrogram(self):
        self.S = librosa.feature.melspectrogram(self.y, sr=self.sr, n_mels=self.n_mels)
        self.log_S = librosa.logamplitude(self.S, ref_power=np.max)
    
    def plotmelspectrogram(self):
        plt.figure(figsize=(12, 4))
        librosa.display.specshow(self.log_S, sr=self.sr, x_axis='time', y_axis='mel')
        plt.title('mel Power Spectrogram')
        plt.colorbar(format='%+02.0f dB')
        plt.tight_layout()
    
    def extractmfcc(self, n_mfcc=13):
        self.mfcc = librosa.feature.mfcc(S=self.log_S, n_mfcc=n_mfcc)
        self.delta_mfcc = librosa.feature.delta(self.mfcc)
        self.delta2_mfcc = librosa.feature.delta(self.mfcc, order=2)
        self.M = np.vstack([self.mfcc, self.delta_mfcc, self.delta2_mfcc])
    
    def plotmfcc(self):
        plt.figure(figsize=(12, 6))
        plt.subplot(3, 1, 1)
        librosa.display.specshow(self.mfcc)
        plt.ylabel('MFCC')
        plt.colorbar()
        
        plt.subplot(3, 1, 2)
        librosa.display.specshow(self.delta_mfcc)
        plt.ylabel('MFCC-$\Delta$')
        plt.colorbar()
        
        plt.subplot(3, 1, 3)
        librosa.display.specshow(self.delta2_mfcc, sr=self.sr, x_axis='time')
        plt.ylabel('MFCC-$\Delta^2$')
        plt.colorbar()
        
        plt.tight_layout()
    
    def extractrmse(self):
        self.rmse = librosa.feature.rmse(y=self.y)

In [3]:
class Dataset:
    def __init__(self, datasetDir, datasetLabelFilename, datasetArrayFilename):
        try:
            if not os.path.exists(datasetDir) or not os.path.isdir(datasetDir):
                raise
            self.datasetDir = datasetDir
        except:
            sys.exit(datasetDir + " does not exists or is not a directory")
        
        try:
            if not os.path.exists(datasetLabelFilename) or not os.path.isfile(datasetLabelFilename):
                raise
            self.datasetLabelFilename = datasetLabelFilename
        except:
            sys.exit(datasetLabelFilename + " does not exists or is not a file")
        
        self.datasetArrayFilename = datasetArrayFilename
        
        self.n_features = 28
        self.X = np.empty(shape=(0, self.n_features))
        self.Y = np.empty(shape=(0, 2))
        
    
    def build(self):
        with open(self.datasetLabelFilename, 'r') as datasetLabelFile:
            filesProcessed=0
            for line in datasetLabelFile:
                lineSplit = line.strip().split(' ')
                audiofilename = lineSplit[0]
                label = lineSplit[1]
                try:
                    features = FeatureExtraction(os.path.join(self.datasetDir, audiofilename))
                    features.loadFile()
                    features.melspectrogram()
                    features.extractmfcc()
                    features.extractrmse()
                except ValueError:
                    print("[ERROR] Error in file " + audiofilename)
                    continue
                
                featureVector = []
                for feature in features.mfcc:
                    featureVector.append(np.mean(feature))
                    featureVector.append(np.var(feature))
                
                featureVector.append(np.mean(features.rmse))
                featureVector.append(np.var(features.rmse))
                
                self.X = np.vstack((self.X, [featureVector]))
                
                if label == "STUTTER":
                    self.Y = np.vstack((self.Y, [0, 1]))
                elif label == "NORMAL":
                    self.Y = np.vstack((self.Y, [1, 0]))
                else:
                    sys.exit("Unexpected label: " + label)
                
                filesProcessed += 1
                if filesProcessed % 1000 == 0:
                    print("[INFO] Files processed:", filesProcessed)
            
            print("-----------------------------")
            print("[INFO] Total files processed:", filesProcessed)
    
    def writeToFile(self, filename=None):
        if filename == None:
            filename = self.datasetArrayFilename
        if os.path.exists(filename):
            os.remove(filename)
        np.savetxt(filename, np.hstack((self.X, self.Y)))
        print("[INFO] Array stored in file", filename)
    
    def readFromFile(self, filename=None):
        if filename == None:
            filename = self.datasetArrayFilename
        if not os.path.exists(filename) or not os.path.isfile(filename):
            sys.exit(filename + " does not exists or is not a file")
        matrix = np.loadtxt(filename)
        self.X = matrix[:, 0:self.n_features]
        self.Y = matrix[:, self.n_features:]
        print("[INFO] Array read from file", filename)

In [4]:
dataset = Dataset('dataset', 'datasetLabels.txt', 'datasetArray.gz')
if not os.path.isfile(dataset.datasetArrayFilename):
    dataset.build()
    dataset.writeToFile()
else:
    dataset.readFromFile()

[INFO] Files processed: 1000
[INFO] Files processed: 2000
[ERROR] Error in file M_1103_20y0m_1:241344:241344.wav
[INFO] Files processed: 3000
[INFO] Files processed: 4000
[ERROR] Error in file M_1105_21y0m_1:831719:831719.wav
[INFO] Files processed: 5000
[INFO] Files processed: 6000
[INFO] Files processed: 7000
[INFO] Files processed: 8000
[INFO] Files processed: 9000
[INFO] Files processed: 10000
[INFO] Files processed: 11000
[INFO] Files processed: 12000
-----------------------------
[INFO] Total files processed: 12631
[INFO] Array stored in file datasetArray.gz


# Tensorflow binary classification

In [5]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(dataset.X, dataset.Y)

In [8]:
# Learning Parameters
learning_rate = 0.001
training_epochs = 800
batch_size = 100
display_step = 1

# Model Parameters
n_hidden_1 = 10
n_hidden_2 = 10
n_input = 28
n_classes = 2

x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

def network(x, weights, biases):
    # Layer 1
    layer1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer1 = tf.nn.relu(layer1)
    
    # Layer 2
    layer2 = tf.add(tf.matmul(layer1, weights['h2']), biases['b2'])
    layer2 = tf.nn.relu(layer2)
    
    # Output layer
    outLayer = tf.matmul(layer2, weights['out']) + biases['out']
    
    return outLayer

weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Model
pred = network(x, weights, biases)

# Loss function and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize the variables
init = tf.global_variables_initializer()

In [9]:
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch = int(len(X_train) / batch_size)
        X_batches = np.array_split(X_train, total_batch)
        Y_batches = np.array_split(Y_train, total_batch)
        
        for i in range(total_batch):
            batch_x, batch_y = X_batches[i], Y_batches[i]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                          y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
    global result 
    result = tf.argmax(pred, 1).eval({x: X_test, y: Y_test})

Epoch: 0001 cost= 3070.291736359
Epoch: 0002 cost= 790.284129041
Epoch: 0003 cost= 425.137450685
Epoch: 0004 cost= 305.148904354
Epoch: 0005 cost= 217.393925525
Epoch: 0006 cost= 155.239775475
Epoch: 0007 cost= 110.879570251
Epoch: 0008 cost= 89.110029464
Epoch: 0009 cost= 74.343169882
Epoch: 0010 cost= 60.255725252
Epoch: 0011 cost= 47.302195468
Epoch: 0012 cost= 38.764993749
Epoch: 0013 cost= 32.513992857
Epoch: 0014 cost= 27.584720378
Epoch: 0015 cost= 23.264260911
Epoch: 0016 cost= 19.683155648
Epoch: 0017 cost= 16.970819643
Epoch: 0018 cost= 14.569856547
Epoch: 0019 cost= 12.556601574
Epoch: 0020 cost= 10.906242033
Epoch: 0021 cost= 9.616040619
Epoch: 0022 cost= 8.407238945
Epoch: 0023 cost= 7.322437154
Epoch: 0024 cost= 6.593174744
Epoch: 0025 cost= 5.798421340
Epoch: 0026 cost= 5.293667549
Epoch: 0027 cost= 4.874466521
Epoch: 0028 cost= 4.512868695
Epoch: 0029 cost= 3.872979541
Epoch: 0030 cost= 3.518322268
Epoch: 0031 cost= 3.226650126
Epoch: 0032 cost= 3.097393295
Epoch: 0033 