In [None]:
import Model
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler
#from tensorflow.keras.models import Model
from tqdm import tqdm 
import time
import os
import re
import hashlib
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  
import SpeechGenerator
import librosa
#from keras import losses

from extractMFCC import computeFeatures, computeFeatures1
from addNoise import addNoise

tf.enable_eager_execution()
print(tf.executing_eagerly())
print(tf.__version__)
print(tf.keras.__version__)

In [None]:
# Root folder of the dataset
dataset_dir = "Dataset/"

In [None]:
# File containing the path that identify the test and validation set
testWAVs = pd.read_csv(dataset_dir + 'testing_list.txt', sep=" ", header = None)[0].tolist()
valWAVs  = pd.read_csv(dataset_dir + 'validation_list.txt', sep=" ", header = None)[0].tolist()

In [None]:
# Dictionary containing the mapping between category name and label
DictCategs = {'nine' : 1, 'yes' : 2, 'no' : 3, 'up' : 4, 'down' : 5, 'left' : 6, 'right' : 7, 'on' : 8, 'off' : 9, 
              'stop' : 10, 'go' : 11, 'zero' : 12, 'one' : 13, 'two' : 14, 'three' : 15, 'four' : 16, 'five' : 17, 
              'six' : 18, 'seven' : 19, 'eight' : 20, 'backward':0, 'bed':0, 'bird':0, 'cat':0, 'dog':0, 'follow':0, 
              'forward':0, 'happy':0, 'house':0, 'learn':0, 'marvin':0, 'sheila':0, 'tree':0, 'visual':0, 'wow':0 }
nCategs = 21

In [None]:
# Load all the file in dataset
allWAVs  = []
for root, dirs, files in os.walk('Dataset/'):
    for f in files:
        if (root != dataset_dir + "_background_noise_") and (f.endswith('.wav')):
            path = root + "/" + f
            #print(path)
            path = path[len(dataset_dir):]
            #print(path)
            allWAVs.append(path)

# Remove from the training set the elements present in test and validation
trainWAVs = list(set(allWAVs) - set(valWAVs) - set(testWAVs))

In [None]:
# Size of sets
print("Train set length: " + str(len(trainWAVs)))
print("Validation set length: " + str(len(valWAVs)))
print("Test set length: " + str(len(testWAVs)))

In [None]:
# Function to extract the category from the path to the file
def _getFileCategory(file, catDict):
    # Receives a file with name <cat>/<filename> and returns an integer that is catDict[cat]
    categ = os.path.basename(os.path.dirname(file))
    return catDict.get(categ, 0)

In [None]:
# Get categories of each set
testWAVlabels = [_getFileCategory(f, DictCategs) for f in testWAVs]
valWAVlabels = [_getFileCategory(f, DictCategs) for f in valWAVs]
trainWAVlabels = [_getFileCategory(f, DictCategs) for f in trainWAVs]

# And test the size of the labels set
print("Train-Labels set length: " + str(len(trainWAVlabels)))
print("Validation-Labels set length: " + str(len(valWAVlabels)))
print("Test-Labels set length: " + str(len(testWAVlabels)))

In [None]:
# Transoform the train dataset in numpy array and load them 
train = np.array(trainWAVs, dtype = object)
trainLabels = np.array(trainWAVlabels, dtype = '>i4') #stands for int32

print("Loading of the train set:")
for i in range(len(trainWAVs)):
    # Print the progress 
    if (i % 5000) == 0:
        print(str(i) + '/' + str(len(trainWAVs)))
    
    # If the file is not already present, we create the numpy version 
    if (not os.path.isfile(dataset_dir + "/" + trainWAVs[i] + '.npy')):
        y, sr = librosa.load(dataset_dir + "/" + trainWAVs[i], sr = 16000)
        np.save(dataset_dir + "/" + trainWAVs[i] + '.npy', y)
    
    # We load the path to numpy array in a vector 
    train[i] = trainWAVs[i] + '.npy'
    
print(str(i+1) + '/' + str(len(trainWAVs)))

In [None]:
# Do the same thing for the validation and the test set
val = np.array(valWAVs, dtype = object)
valLabels = np.array(valWAVlabels, dtype = '>i4') #stands for int32

print("Loading of the validation set:")
for i in range(len(valWAVs)):
    # Print the progress 
    if (i % 5000) == 0:
        print(str(i) + '/' + str(len(valWAVs)))
    
    # If the file is not already present, we create the numpy version 
    if (not os.path.isfile(dataset_dir + "/" + valWAVs[i] + '.npy')):
        y, sr = librosa.load(dataset_dir + "/" + valWAVs[i], sr = 16000)
        np.save(dataset_dir + "/" + valWAVs[i] + '.npy', y)
    
    # We load the path to numpy array in a vector 
    val[i] = valWAVs[i] + '.npy'
    
print(str(i+1) + '/' + str(len(valWAVs)))

test = np.array(testWAVs, dtype = object)

print("Loading of the test set:")
for i in range(len(testWAVs)):
    # Print the progress 
    if (i % 5000) == 0:
        print(str(i) + '/' + str(len(testWAVs)))
    
    # If the file is not already present, we create the numpy version 
    if (not os.path.isfile(dataset_dir + "/" + testWAVs[i] + '.npy')):
        y, sr = librosa.load(dataset_dir + "/" + testWAVs[i], sr = 16000)
        np.save(dataset_dir + "/" + testWAVs[i] + '.npy', y)
    
    # We load the path to numpy array in a vector 
    test[i] = testWAVs[i] + '.npy' 
    
print(str(i+1) + '/' + str(len(testWAVs)))

In [None]:
#test on the length
print(str(len(trainWAVs)) + " = " + str(len(train)))
print(str(len(valWAVs)) + " = " + str(len(val)))
print(str(len(testWAVs)) + " = " + str(len(test)))

#test on labels 
for i in range(0, 1):
    print("file: " + train[i] + " - label: " + str(trainLabels[i]))
    print("file: " + trainWAVs[i] + " - label: " + str(trainLabels[i]))

In [None]:
# Function to load numpy array
def load_data(file_name, data_dir):
    # Load the wav signal from the .npy file
    data = np.load(data_dir + file_name)
    return data

# Plot a wav
file_name = train[25]
data = load_data(file_name, dataset_dir)
plt.figure()
plt.plot(data, color='b')
plt.title('WAV signal')

In [None]:
# Function to preprocess the data 
def load_and_preprocess_data(file_name, data_dir):
    # Required by tensorflow (strings are passed as bytes)
    if type(file_name) is bytes:
        file_name = file_name.decode()
        data_dir = data_dir.decode()

    # Load data
    data = load_data(file_name, data_dir)
    feats = computeFeatures1(data, 16000)
    # Normalize
    #feats -= np.mean(feats, axis=0)
    #mean = np.mean(feats)
    #stv = np.std(feats, axis = 0)
    #diff = np.subtract(feats, mean)
    feats = np.divide(feats, np.max(feats))

    return feats.astype(np.float32)
    
def load_and_preprocess_data2(feats):
    # Compute the shifted input 
    feats = np.delete(feats, 98, 0)
    #print(feats.shape)
    feats = np.insert(feats, 0, values = 0, axis = 0)
    
    return feats.astype(np.float32)

# example:
index = 34587
feats = load_and_preprocess_data(train[index], dataset_dir)
feats2 = load_and_preprocess_data2(feats)
feats = np.transpose(feats)
feats2 = np.transpose(feats2)
#plt.plot(feats, color='b')
plt.figure(figsize=(17,6))
plt.pcolormesh(feats)

plt.title('Spectrogram visualization')
plt.ylabel('Frequency')
plt.xlabel('Time')

plt.figure(figsize=(17,6))
plt.pcolormesh(feats2)

plt.title('Spectrogram visualization')
plt.ylabel('Frequency')
plt.xlabel('Time')

plt.show()
print("File: " + train[index] + " - Label: " + str(trainLabels[index]))
#print(feats)

In [None]:
def create_dataset(data_dir, file_names, batch_size = 32, shuffle = True, cache_file = None):
    
    # Create a Dataset object
    dataset = tf.data.Dataset.from_tensor_slices((file_names))
    
    # Map the load_and_preprocess_data function
    py_func = lambda file_name: (tf.py_func(load_and_preprocess_data, [file_name, data_dir], tf.float32))
    dataset = dataset.map(py_func, num_parallel_calls = os.cpu_count())
    
    
    # Map the load_and_preprocess_data function
    py_func2 = lambda original: (original, 
                                 tf.py_func(load_and_preprocess_data2, [original], tf.float32),
                                 original)
    dataset = dataset.map(py_func2, num_parallel_calls = os.cpu_count())
    
    # Cache dataset
    if cache_file:
        dataset = dataset.cache(cache_file)
    
    # Shuffle    
    if shuffle:
        dataset = dataset.shuffle(len(file_names))
        
    # Repeat the dataset indefinitely (capire bene anche questo repeat come funziona)
    dataset = dataset.repeat()
    
    # Correct input shape for the network
    dataset = dataset.map(lambda data, data2, data3: (tf.expand_dims(data, -1),
                                                      data2,
                                                      tf.expand_dims(data3, -1)))
    
    # Batch
    dataset = dataset.batch(batch_size = batch_size)
    
    # Prefetch (1 means that prefetch a batch at time)
    dataset = dataset.prefetch(buffer_size = 1)
    
    return dataset


In [None]:
batch_size = 64

train_dataset = create_dataset(dataset_dir, train, batch_size = batch_size, shuffle = True, cache_file = 'train_cache_AE')

val_dataset = create_dataset(dataset_dir, val, batch_size = batch_size, shuffle = False, cache_file = 'val_cache_AE')

test_dataset = create_dataset(dataset_dir, test, batch_size = batch_size, shuffle = False, cache_file = 'test_cache_AE')

train_steps = int(np.ceil(len(train) / batch_size))
val_steps = int(np.ceil(len(val) / batch_size))
test_steps = int(np.ceil(len(test) / batch_size))

print("steps to completa a train epoch: " + str(train_steps))
print("steps to completa a validation spoch: " + str(val_steps))
print("steps to completa a test epoch: " + str(test_steps))

tf.compat.v1.data.get_output_types(train_dataset)

In [None]:
model = Model.Seq2SeqModel(nCategs, 99, 39, use_GRU = True)
model.summary()

In [None]:
# questa funzione sembra essere difficile da applicare ad un fit normale, si può provare ma intanto ho provato ad usare 
# l'exp_decay che si da in ingresso all'optimizer
import math
def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.4
    epochs_drop = 3.0
    lrate = initial_lrate * math.pow(drop,  
            math.floor((1+epoch)/epochs_drop))
    
    if (lrate < 4e-5):
        lrate = 4e-5
      
    print('Changing learning rate to {}'.format(lrate))
    return lrate
lrate = LearningRateScheduler(step_decay)

earlystopper = EarlyStopping(monitor='val_sparse_categorical_accuracy', patience = 3, verbose = 1)
checkpointer = ModelCheckpoint('Model/AttentionModel-checkpoint-1.h5', 
                               monitor = 'val_sparse_categorical_accuracy', 
                               verbose = 1, save_best_only = True, save_weights_only = True)

In [None]:
learning_rate = 0.01
global_step = tf.Variable(train_steps * 2, trainable = False)

decayed_lr = tf.train.exponential_decay(learning_rate,
                                        global_step, train_steps * 2,
                                        0.4, staircase = True)

adam = tf.train.AdamOptimizer(decayed_lr, 
                              beta1 = 0.9,
                              beta2 = 0.999,
                              epsilon = 1e-07,
                              use_locking = False,
                              name = 'Adam')

model.compile(optimizer = adam,
              loss='mse', 
              metrics=['accuracy'])

num_epochs = 6
history = model.fit(train_dataset, 
                    epochs = num_epochs, 
                    steps_per_epoch = train_steps,
                    validation_data = val_dataset, 
                    validation_steps = val_steps,
                    callbacks = [checkpointer, earlystopper])

In [None]:
#print(history.history.keys())

# Plot loss
plt.figure()
plt.plot(history.history['loss'], label = 'Train loss')
plt.plot(history.history['val_loss'], label = 'Val loss')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')

# Plot accuracy
plt.figure()
plt.plot(history.history['sparse_categorical_accuracy'], label = 'Train acc')
plt.plot(history.history['val_sparse_categorical_accuracy'], label = 'Val acc')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

# Save the model
model.save('Model/AttentionModel-1_00-0107-17.h5')

testEval = model.evaluate(test_dataset,
                          steps = test_steps,
                          verbose = 1)

In [None]:
x = "left/94de6a6a_nohash_2.wav.npy"
x = load_and_preprocess_data(x, dataset_dir).reshape((1, 99, 39, 1))
print(x.shape)
res = model.predict(x) 
print(res.argmax())