In [0]:
!wget -q https://www.dropbox.com/s/yigijs122togfk4/embeddings.dat?dl=1  -O embeddings.dat
!wget -q https://www.dropbox.com/s/qdhtexle4p0ngc3/DatasetSplitCSV.zip?dl=1 -O DatasetSplitCSV.zip
!unzip -q DatasetSplitCSV.zip

In [19]:
!pip install speechpy

Collecting speechpy
  Downloading https://files.pythonhosted.org/packages/8f/12/dbda397a998063d9541d9e149c4f523ed138a48824d20598e37632ba33b1/speechpy-2.4-py2.py3-none-any.whl
Installing collected packages: speechpy
Successfully installed speechpy-2.4


In [0]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import scipy.io.wavfile as wav
from speechpy.feature import mfcc
import librosa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow import keras
from keras.utils import np_utils
from sklearn.utils.multiclass import unique_labels

In [0]:
params={}
params["embeddingType"]="raw" # post
params["mapReduceFunc"]="Pad" # Avg

In [0]:
def mapReduce(embed,funcName):
    if funcName=="Avg":
        embed=np.average(embed,axis=0)
    if funcName=="Pad":
        embed=np.pad(embed, [( 0,6-embed.shape[0]), (0, 0)], mode='constant', constant_values=0)
        embed=embed.reshape(-1)
    return embed

In [0]:
train=pd.read_csv("train.csv",header=None)
valid=pd.read_csv("valid.csv",header=None)
test=pd.read_csv("test.csv",header=None)
embeddings=np.load("embeddings.dat",allow_pickle=True)

In [7]:
embeddings["post"]['03-02-04-01-02-02-02.wav'].dtype

dtype('uint8')

In [8]:
list(embeddings['raw'].keys())[2000]

'03-02-04-01-02-02-02.wav'

In [0]:
x_train=[]
x_val=[]
x_test=[]
for x in train[0]:
    embed=embeddings[params["embeddingType"]][x]
    x_train.append(mapReduce(embed,params["mapReduceFunc"]))

for x in valid[0]:
    embed=embeddings[params["embeddingType"]][x]
    x_val.append(mapReduce(embed,params["mapReduceFunc"]))

for x in test[0]:
    embed=embeddings[params["embeddingType"]][x]
    x_test.append(mapReduce(embed,params["mapReduceFunc"]))
x_train=np.array(x_train)
x_val=np.array(x_val)
x_test=np.array(x_test)

y_train=np.array(train[1].astype('category').cat.codes)
y_val=np.array(valid[1].astype('category').cat.codes)
y_test=np.array(test[1].astype('category').cat.codes)

In [57]:
x_train[0].shape

(768,)

In [0]:
inputs = keras.Input(shape=(x_train.shape[-1],), name="log-mel")
x = tf.keras.layers.Reshape((6, 128), input_shape=(x_train.shape[-1],))(inputs)
x = layers.LSTM(128,input_shape=((6,128)))(x)
# x = layers.Dense(64, activation="tanh", name="dense_1")(inputs)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(32, activation="relu", name="dense_2")(x)
x = layers.Dense(16, activation="tanh", name="dense_3")(x)
x = layers.Dense(8, activation="softmax", name="dense_4")(x)

In [0]:
model.compile(optimizer=keras.optimizers.Adam(),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[keras.metrics.sparse_categorical_accuracy])

In [71]:
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=34,
                    validation_data=(x_val, y_val))

Epoch 1/34
Epoch 2/34
Epoch 3/34
Epoch 4/34
Epoch 5/34
Epoch 6/34
Epoch 7/34
Epoch 8/34
Epoch 9/34
Epoch 10/34
Epoch 11/34
Epoch 12/34
Epoch 13/34
Epoch 14/34
Epoch 15/34
Epoch 16/34
Epoch 17/34
Epoch 18/34
Epoch 19/34
Epoch 20/34
Epoch 21/34
Epoch 22/34
Epoch 23/34
Epoch 24/34
Epoch 25/34
Epoch 26/34
Epoch 27/34
Epoch 28/34
Epoch 29/34
Epoch 30/34
Epoch 31/34
Epoch 32/34
Epoch 33/34
Epoch 34/34


In [0]:
def draw_hist(hist):
    acc = hist.history['accuracy']
    val_acc = hist.history['val_accuracy']
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']

    epochs = range(1, len(acc) + 1)

    # "bo" is for "blue dot"
    plt.figure(figsize=(8, 4.5))
    plt.plot(epochs, loss, 'bo', label='Training loss')
    # b is for "solid blue line"
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

    plt.clf()   # clear figure
    acc_values = acc
    val_acc_values = val_acc

    plt.figure(figsize=(8, 4.5))
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [0]:
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data

    print('unique labels:', unique_labels(y_true, y_pred))
    classes = np.array(classes)[unique_labels(y_true, y_pred)]
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')


    fig, ax = plt.subplots()
    fig.set_figheight(9)
    fig.set_figwidth(9)
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

In [75]:
test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=128, verbose=0)
print(test_loss, test_acc)

1.7774624824523926 0.4887983798980713
