In [1]:
import Model
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
#from tensorflow.keras.models import Model
from tqdm import tqdm 
import time
import os
import re
import hashlib
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  
import SpeechGenerator
import librosa
#from keras import losses

from extractMFCC import computeFeatures, computeFeatures1
from addNoise import addNoise

#tf.enable_eager_execution()
#print(tf.executing_eagerly())
print(tf.__version__)
print(tf.keras.__version__)

ImportError: Something is wrong with the numpy installation. While importing we detected an older version of numpy in ['/home/jupyter/.local/lib/python3.5/site-packages/numpy']. One method of fixing this is to repeatedly uninstall numpy until none is found, then reinstall this version.

In [None]:
# Root folder of the dataset
dataset_dir = "Dataset/"

In [None]:
# File containing the path that identify the test and validation set
testWAVs = pd.read_csv(dataset_dir + 'testing_list.txt', sep=" ", header = None)[0].tolist()
valWAVs  = pd.read_csv(dataset_dir + 'validation_list.txt', sep=" ", header = None)[0].tolist()

In [None]:
# Dictionary containing the mapping between category name and label
DictCategs = {'nine' : 1, 'yes' : 2, 'no' : 3, 'up' : 4, 'down' : 5, 'left' : 6, 'right' : 7, 'on' : 8, 'off' : 9, 
              'stop' : 10, 'go' : 11, 'zero' : 12, 'one' : 13, 'two' : 14, 'three' : 15, 'four' : 16, 'five' : 17, 
              'six' : 18, 'seven' : 19, 'eight' : 20, 'backward':0, 'bed':0, 'bird':0, 'cat':0, 'dog':0, 'follow':0, 
              'forward':0, 'happy':0, 'house':0, 'learn':0, 'marvin':0, 'sheila':0, 'tree':0, 'visual':0, 'wow':0 }
nCategs = 21

In [None]:
# Load all the file in dataset
allWAVs  = []
for root, dirs, files in os.walk('Dataset/'):
    for f in files:
        if (root != dataset_dir + "_background_noise_") and (f.endswith('.wav')):
            path = root + "/" + f
            #print(path)
            path = path[len(dataset_dir):]
            #print(path)
            allWAVs.append(path)

# Remove from the training set the elements present in test and validation
trainWAVs = list(set(allWAVs) - set(valWAVs) - set(testWAVs))

In [None]:
# Size of sets
print("Train set length: " + str(len(trainWAVs)))
print("Validation set length: " + str(len(valWAVs)))
print("Test set length: " + str(len(testWAVs)))

In [None]:
# Function to extract the category from the path to the file
def _getFileCategory(file, catDict):
    # Receives a file with name <cat>/<filename> and returns an integer that is catDict[cat]
    categ = os.path.basename(os.path.dirname(file))
    return catDict.get(categ, 0)

In [None]:
# Get categories of each set
testWAVlabels = [_getFileCategory(f, DictCategs) for f in testWAVs]
valWAVlabels = [_getFileCategory(f, DictCategs) for f in valWAVs]
trainWAVlabels = [_getFileCategory(f, DictCategs) for f in trainWAVs]

# And test the size of the labels set
print("Train-Labels set length: " + str(len(trainWAVlabels)))
print("Validation-Labels set length: " + str(len(valWAVlabels)))
print("Test-Labels set length: " + str(len(testWAVlabels)))

In [None]:
# Transoform the train dataset in numpy array and load them 
train = np.array(trainWAVs, dtype = object)
trainLabels = np.array(trainWAVlabels, dtype = '>i4') #stands for int32

print("Loading of the train set:")
for i in range(len(trainWAVs)):
    # Print the progress 
    if (i % 5000) == 0:
        print(str(i) + '/' + str(len(trainWAVs)))
    
    # If the file is not already present, we create the numpy version 
    if (not os.path.isfile(dataset_dir + "/" + trainWAVs[i] + '.npy')):
        y, sr = librosa.load(dataset_dir + "/" + trainWAVs[i], sr = 16000)
        np.save(dataset_dir + "/" + trainWAVs[i] + '.npy', y)
    
    # We load the path to numpy array in a vector 
    train[i] = trainWAVs[i] + '.npy'
    
print(str(i+1) + '/' + str(len(trainWAVs)))

In [None]:
# Do the same thing for the validation and the test set
val = np.array(valWAVs, dtype = object)
valLabels = np.array(valWAVlabels, dtype = '>i4') #stands for int32

print("Loading of the validation set:")
for i in range(len(valWAVs)):
    # Print the progress 
    if (i % 5000) == 0:
        print(str(i) + '/' + str(len(valWAVs)))
    
    # If the file is not already present, we create the numpy version 
    if (not os.path.isfile(dataset_dir + "/" + valWAVs[i] + '.npy')):
        y, sr = librosa.load(dataset_dir + "/" + valWAVs[i], sr = 16000)
        np.save(dataset_dir + "/" + valWAVs[i] + '.npy', y)
    
    # We load the path to numpy array in a vector 
    val[i] = valWAVs[i] + '.npy'
    
print(str(i+1) + '/' + str(len(valWAVs)))

test = np.array(testWAVs, dtype = object)

print("Loading of the test set:")
for i in range(len(testWAVs)):
    # Print the progress 
    if (i % 5000) == 0:
        print(str(i) + '/' + str(len(testWAVs)))
    
    # If the file is not already present, we create the numpy version 
    if (not os.path.isfile(dataset_dir + "/" + testWAVs[i] + '.npy')):
        y, sr = librosa.load(dataset_dir + "/" + testWAVs[i], sr = 16000)
        np.save(dataset_dir + "/" + testWAVs[i] + '.npy', y)
    
    # We load the path to numpy array in a vector 
    test[i] = testWAVs[i] + '.npy' 
    
print(str(i+1) + '/' + str(len(testWAVs)))

In [None]:
#test on the length
print(str(len(trainWAVs)) + " = " + str(len(train)))
print(str(len(valWAVs)) + " = " + str(len(val)))
print(str(len(testWAVs)) + " = " + str(len(test)))

#test on labels 
for i in range(0, 1):
    print("file: " + train[i] + " - label: " + str(trainLabels[i]))
    print("file: " + trainWAVs[i] + " - label: " + str(trainLabels[i]))

In [None]:
# Function to load numpy array
def load_data(file_name, data_dir):
    # Load the wav signal from the .npy file
    data = np.load(data_dir + file_name)
    return data

# Plot a wav
file_name = train[25]
data = load_data(file_name, dataset_dir)
plt.figure()
plt.plot(data, color='b')
plt.title('WAV signal')

In [None]:
# Function to preprocess the data 
def load_and_preprocess_data(file_name, data_dir):
    # Required by tensorflow (strings are passed as bytes)
    if type(file_name) is bytes:
        file_name = file_name.decode()
        data_dir = data_dir.decode()

    # Load data
    data = load_data(file_name, data_dir)
    feats = computeFeatures1(data, 16000)
    # Normalize
    #feats -= (np.mean(feats, axis=0) + 1e-8)
    #mean = np.mean(feats, axis = 0)
    #stv = np.std(feats, axis = 0)
    #diff = np.subtract(feats, mean)
    #feats = np.divide(diff, stv)

    return feats.astype(np.float32)

# example:
index = 26257
feats = load_and_preprocess_data(train[index], dataset_dir)
feats = np.transpose(feats)
#plt.plot(feats, color='b')
plt.figure(figsize=(17,6))
plt.pcolormesh(feats)

plt.title('Spectrogram visualization')
plt.ylabel('Frequency')
plt.xlabel('Time')

plt.show()
print("File: " + train[index] + " - Label: " + str(trainLabels[index]))
#print(np.max(feats))

In [None]:
def create_dataset(data_dir, file_names, labels, batch_size = 32, shuffle = True, cache_file = None):
    
    # Create a Dataset object
    dataset = tf.data.Dataset.from_tensor_slices((file_names, labels))
    
    # Map the load_and_preprocess_data function
    py_func = lambda file_name, label: (tf.py_function(load_and_preprocess_data, [file_name, data_dir], tf.float32), label)
    dataset = dataset.map(py_func, num_parallel_calls = os.cpu_count())
    
    # Cache dataset
    if cache_file:
        dataset = dataset.cache(cache_file)
    
    # Shuffle    
    if shuffle:
        dataset = dataset.shuffle(len(file_names))
        
    # Repeat the dataset indefinitely
    dataset = dataset.repeat()
    
    # Correct input shape for the network
    dataset = dataset.map(lambda data, label: (tf.expand_dims(data, -1), label))
    
    # Batch
    dataset = dataset.batch(batch_size = batch_size)
    
    # Prefetch
    dataset = dataset.prefetch(buffer_size = 1)
    
    return dataset


In [None]:
batch_size = 32

train_dataset = create_dataset(dataset_dir, train, trainLabels, batch_size = batch_size, shuffle = True, cache_file = 'train_cache')

val_dataset = create_dataset(dataset_dir, val, valLabels, batch_size = batch_size, shuffle = False, cache_file = 'val_cache')

train_steps = int(np.ceil(len(train) / batch_size))
val_steps = int(np.ceil(len(val) / batch_size))
print(train_steps)
print(val_steps)

In [None]:
# Initialize a dataset for test
small_dataset = create_dataset(dataset_dir,
                               train[:1000], 
                               trainLabels[:1000], 
                               batch_size = batch_size, 
                               shuffle = True, 
                               cache_file = 'small_cache')

# Define an iterator to get data
iterator = small_dataset.make_one_shot_iterator()
next_element = iterator.get_next()
# Start a new session
with tf.Session() as sess:
    # Iterate for 3 epochs
    for num_epoch in range(3):
        # Time the loading time
        it = time.time()
        for step in tqdm(range(3)):
                # Get the next batch of data
                data, label = sess.run(next_element)
        # Print loading time
        print('EPOCH {} - Time to load the entire dataset [seconds]: {}'.format(num_epoch+1, time.time() - it))
    
# Remove the created cache
os.remove('small_cache.data-00000-of-00001')
os.remove('small_cache.index')

In [None]:
#trainGen = SpeechGenerator.SpeechGen(train, trainLabels, dataset_dir, batch_size = 64, shuffle = True)
#valGen = SpeechGenerator.SpeechGen(val, valLabels, dataset_dir, batch_size = 16, shuffle = False)    

In [None]:
#keras.backend.set_floatx('float32')

#print(keras.backend.floatx())

model = Model.SimpleModel(nCategs, 99, 39, use_GRU = True, dropout = 0.1, activation = 'relu')
"""
decayed_lr = tf.train.exponential_decay(0.0000001,
                                        0.8, 50,
                                        0.95, staircase=True)

adam = tf.train.AdamOptimizer(learning_rate=decayed_lr,  
                                beta1=0.9,
                                beta2=0.999,
                                epsilon=1e-07,
                                use_locking=False,
                                name='Adam')
"""
adam = tf.train.AdamOptimizer(learning_rate=0.001,  
                                beta1=0.9,
                                beta2=0.999,
                                epsilon=1e-07,
                                use_locking=False,
                                name='Adam')

#model.compile(optimizer=adam,
#              loss= tf.losses.softmax_cross_entropy,
#              metrics=[keras.metrics.sparse_categorical_accuracy, ])

model.compile(optimizer = adam,
              loss = tf.keras.losses.sparse_categorical_crossentropy,
              metrics = ['sparse_categorical_accuracy'])

model.summary()

In [None]:
num_epochs = 10
history = model.fit(train_dataset, 
                    epochs = num_epochs, 
                    steps_per_epoch = train_steps,
                    validation_data = val_dataset, 
                    validation_steps = val_steps)

# Save the model
model.save('Model/my_model_using-tf.h5')

print(history.history.keys())

# Plot loss
plt.figure()
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Val loss')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')

# Plot accuracy
plt.figure()
plt.plot(history.history['categorical_accuracy'], label='Train acc')
plt.plot(history.history['val_categorical_accuracy'], label='Val acc')
plt.legend()

plt.xlabel('Epoch')
plt.ylabel('Accuracy')

In [None]:
"""
import math
def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.4
    epochs_drop = 3.0
    lrate = initial_lrate * math.pow(drop,  
            math.floor((1+epoch)/epochs_drop))
    
    if (lrate < 4e-5):
        lrate = 4e-5
      
    print('Changing learning rate to {}'.format(lrate))
    return lrate
lrate = LearningRateScheduler(step_decay)

earlystopper = EarlyStopping(monitor='val_categorical_accuracy', patience=10, verbose=1)
checkpointer = ModelCheckpoint('Model/my_model_loss_dropout-0_05-yes_reg-0.h5', 
                               monitor='val_categorical_accuracy', 
                               verbose=1, save_best_only = True, save_weights_only = True)
"""

In [None]:
num_epochs = 1
history = model.fit_generator(trainGen, epochs = num_epochs, callbacks = [earlystopper, checkpointer, lrate])

print(history.history.keys())
# Plot loss
plt.figure()
plt.plot(history.history['loss'], label='Train loss')
plt.plot(history.history['val_loss'], label='Val loss')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')

# Plot accuracy
plt.figure()
plt.plot(history.history['categorical_accuracy'], label='Train acc')
plt.plot(history.history['val_categorical_accuracy'], label='Val acc')
plt.legend()

plt.xlabel('Epoch')
plt.ylabel('Accuracy')

In [None]:
#print(model.get_weights())
booo = np.array(model.get_weights())
for i in range(len(booo)):
    print(str(np.max(booo[i])) + " - min: " + str(np.min(booo[i]))) 

In [None]:
#Debugging
attSpeechModel = keras.Model(inputs=model.input, outputs=[model.get_layer('output').output, 
                                                    model.get_layer('attSoftmax').output,
                                                    model.get_layer('mel_stft').output])

gradients = keras.backend.gradients(model.output, model.input)              #Gradient of output wrt the input of the model (Tensor)
print(gradients)

In [None]:
audios, classes = valGen.__getitem__(1851)
print(classes)

In [None]:
idAudio = 0
print(classes[idAudio])

In [None]:
outs, attW, specs = attSpeechModel.predict( audios )


In [None]:
boh = np.isnan(specs)
np.where(boh == True)
print(specs)

In [None]:
# Save the model (weights only)
model.save_weights('Model/my_model_sparse-loss_dropout-0_1-yes_reg-0.h5')
# Load model (weights only)
#model = keras.models.load_weights("Model/")

In [None]:
testGen = SpeechGenerator.SpeechGen(test, testWAVlabels, dataset_dir, shuffle=False)

testEval = model.evaluate_generator(testGen, verbose = 1)

print("loss: " + str(testEval[0]))
print("acc: " + str(testEval[1]))

In [None]:
# I don't now why i need to reshape in this way but it works 
x = np.load("Dataset/tree/022cd682_nohash_1.wav.npy").reshape((1, 16000))
print(x.shape)
res = model.predict(x) 
print(res.argmax())