In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import scipy.io.wavfile as wav
import numpy as np
import librosa
from python_speech_features import mfcc

# Generación de ficheros

In [2]:
import numpy as np
import os
def generate_file_data(dir,name):
    directory=dir
    a={'0':'zero','1':'one','2':'two','3':'three','4':'four','5':'five','6':'six','7':'seven','8':'eight','9':'nine'}
    da=os.listdir(directory)
    da.sort()
    file = open(dir+name+'.txt',"w")
    for filename in da:
        if '.wav' in filename:
            file.write(filename+','+a[filename[0]]+'\n')
    file.close() 
    with open(directory+'/'+name+'.txt') as f:
        read_data = f.read()
        f.closed
    read_data=read_data.split('\n')
    read_data=read_data[0:len(read_data)-1]
    return read_data

# Encoding words with One Hot Encoding

In [3]:
vocabulary_words=['zero','one','two','three','four','five','six','seven','eight','nine']
label_encoder=LabelEncoder()
integer_encoded = label_encoder.fit_transform(vocabulary_words)
print(integer_encoded)
onehot_encoder = OneHotEncoder(sparse=False,categories='auto')
integer_encoded = integer_encoded.reshape(-1, 1)
onehot_encoded=onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)

[9 4 8 7 2 1 6 5 0 3]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]


In [4]:
def encode(x):# will transform the label using one hot encoding/take a group of strings

    integers=label_encoder.transform(x)
    integers=integers.reshape(-1,1)
    return onehot_encoder.transform(integers)

In [5]:
x=encode(['zero','one'])
print(x)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [6]:
def decode(x):
    inverted = onehot_encoder.inverse_transform(x)
    y=inverted.astype(dtype=int)# castea los valores
    y=y.reshape(1,-1)
    y=y[0]
    y=label_encoder.inverse_transform(y)
    return y

In [7]:
y=decode(x)
print(y)

['zero' 'one']


# MFCC 


In [8]:
def mfcc_features(DIR,list_dir):
    mfcc_audios=[]
    for dir in list_dir:
        wave, sr = librosa.load(DIR+dir, mono=True)
        features= librosa.feature.mfcc(wave, sr,n_mfcc=20)
        #features = mfcc(audio, samplerate=fs)
        features=np.pad(features,((0,0),(0,100-len(features[0]))),mode='constant', constant_values=0)
        #print(features)
        #print(features.shape)
        mfcc_audios.append(features)
        #print(mfcc_audios)
    #print(mfcc_audios)
    mfcc_audios=np.array(mfcc_audios)
    return mfcc_audios

In [9]:
def prepare_data(dir,name):
    file = open(dir+name)
    f=file.read()
    file.close()
    f=f.split('\n')
    f=f[0:len(f)-1]
    labels=[]
    names_audios=[]
    for i in f:
        j=i.split(',')
        names_audios.append(j[0])
        labels.append(j[1])
    labels=np.array(labels)
    onehot_encoded= encode(labels)
    #print(len(onehot_encoded))
    mfcc=mfcc_features(dir,names_audios)
    #print(onehot_encoded)
    #d=decode(onehot_encoded)
    #print(d)
    print(name+' OK')
    return mfcc,onehot_encoded



In [10]:
class dataset:
    def __init__(self,data):
        self.i=0#para el shuffle
        self.data_dir=data
        self.shuffle=None
        self.dir_training=data+'/training/'
        self.dir_test=data+'/test/'
        self.training_set=None
        self.test_set=None
    def split_dataset(self):
        generate_file_data(self.dir_training,name='training')
        generate_file_data(self.dir_test,name='test')
        self.training_set=prepare_data(self.dir_training,'training.txt')
        self.test_set=prepare_data(self.dir_test,'test.txt')
    #def next_batch(self,size):
    #
    #    return self.training_set[self.shuffle]
    

In [11]:
d=dataset('data')
d.split_dataset()

training.txt OK
test.txt OK


In [12]:
d.dir_test

'data/test/'

In [13]:
d.training_set[0][2].shape

(20, 100)

In [14]:
#[d.training_set[0][i].shape[0] for i in range(80)] will display numofcep

In [15]:
np.size(d.training_set[0])

160000

In [16]:
d.test_set

(array([[[-6.16712861e+02, -5.93040323e+02, -5.57094388e+02, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [ 9.56967723e+01,  1.36445463e+02,  1.80911571e+02, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.93036704e+01, -7.71567637e+01, -6.38301433e+01, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         ...,
         [ 8.04933219e+00, -4.26270108e+00, -9.13070011e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [ 5.56216186e+00, -3.05505231e+00, -2.73119511e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.58515724e+00, -1.73228020e+01, -2.23895974e+01, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],
 
        [[-6.47235052e+02, -6.30088770e+02, -6.15745537e+02, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [ 1.86630475e+02,  1.95857416e+02,  1.91884645e+02, ...,
           0.00000000

In [17]:
d.test_set[1]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 

# Red neuronal 

In [18]:
import tflearn
import tensorflow as tf

In [19]:
learning_rate = 0.0001
training_iters = 300000  # steps
batch_size = 64

In [20]:
width = 20  # mfcc features
height = 100  # (max) length of utterance
classes = 10  # digits

In [21]:
trainX, trainY = d.training_set[0],d.training_set[1]
testX, testY = d.test_set[0],d.test_set[1]

In [22]:
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy')

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [23]:
model = tflearn.DNN(net, tensorboard_verbose=0)

In [24]:
import time

In [34]:
ti=time.time()
tflearn.init_graph(num_cores=4,gpu_memory_fraction=0.8)
for i in range(100):
    print(i)
    model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
      #_y=model.predict(X)
    model.save("tflearn.lstm.modelsingpu")
tf=time.time()


Training Step: 39389  | total loss: [1m[32m1.32707[0m[0m | time: 0.056s
| Adam | epoch: 19695 | loss: 1.32707 - acc: 0.6928 -- iter: 64/80
Training Step: 39390  | total loss: [1m[32m1.22094[0m[0m | time: 1.085s
| Adam | epoch: 19695 | loss: 1.22094 - acc: 0.7236 | val_loss: 1.35411 - val_acc: 0.6190 -- iter: 80/80
--
INFO:tensorflow:/home/visoc/Documentos/Acursos/Seminario2/Seminario-de-Tesis-2/pre-pruebas/tflearn.lstm.modelsingpu is not in all_model_checkpoint_paths. Manually adding it.
54
---------------------------------
Run id: Y9R312
Log directory: /tmp/tflearn_logs/
---------------------------------
Training samples: 80
Validation samples: 21
--


KeyboardInterrupt: 

In [25]:
ti=time.time()

tflearn.init_graph(num_cores=4,gpu_memory_fraction=0.8)
for i in range(100):
    print(i)
    with tf.device('/gpu:0'):
        model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
        #_y=model.predict(X)
        model.save("tflearn.lstm.modelsingpu")
tf=time.time()

0
---------------------------------
Run id: SMGSAC
Log directory: /tmp/tflearn_logs/
INFO:tensorflow:Summary name Accuracy/ (raw) is illegal; using Accuracy/__raw_ instead.
---------------------------------
Training samples: 80
Validation samples: 21
--


InvalidArgumentError: Cannot assign a device for operation Accuracy/__raw_: Operation was explicitly assigned to /device:GPU:0 but available devices are [ /job:localhost/replica:0/task:0/device:CPU:0, /job:localhost/replica:0/task:0/device:XLA_CPU:0 ]. Make sure the device specification refers to a valid device.
	 [[node Accuracy/__raw_ (defined at /home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/summaries.py:46)  = ScalarSummary[T=DT_FLOAT, _device="/device:GPU:0"](Accuracy/__raw_/tags, Accuracy/Mean)]]

Caused by op 'Accuracy/__raw_', defined at:
  File "/home/visoc/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/visoc/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/visoc/anaconda3/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/visoc/anaconda3/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/home/visoc/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-25-3c19fd868cdc>", line 7, in <module>
    model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/models/dnn.py", line 216, in fit
    callbacks=callbacks)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 288, in fit
    self.summ_writer, self.coord)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 794, in initialize_fit
    val_feed_dict)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/helpers/trainer.py", line 937, in create_testing_summaries
    summarize(self.metric, "scalar", sname, tr_summ_collection)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/helpers/summarizer.py", line 98, in summarize
    summaries.get_summary(type, name, value, summary_collection)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/summaries.py", line 46, in get_summary
    summ = tf.summary.scalar(tag, value)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tensorflow/python/summary/summary.py", line 90, in scalar
    val = _gen_logging_ops.scalar_summary(tags=tag, values=tensor, name=scope)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 684, in scalar_summary
    "ScalarSummary", tags=tags, values=values, name=name)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/visoc/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Cannot assign a device for operation Accuracy/__raw_: Operation was explicitly assigned to /device:GPU:0 but available devices are [ /job:localhost/replica:0/task:0/device:CPU:0, /job:localhost/replica:0/task:0/device:XLA_CPU:0 ]. Make sure the device specification refers to a valid device.
	 [[node Accuracy/__raw_ (defined at /home/visoc/anaconda3/lib/python3.6/site-packages/tflearn/summaries.py:46)  = ScalarSummary[T=DT_FLOAT, _device="/device:GPU:0"](Accuracy/__raw_/tags, Accuracy/Mean)]]


In [26]:
print("tiempo de entrenamiento")
print(tf-ti)

tiempo de entrenamiento


TypeError: unsupported operand type(s) for -: 'module' and 'float'

In [27]:
_y=model.predict([d.test_set[0][0]])

In [29]:
_y

array([[0.05129381, 0.0809454 , 0.09788167, 0.06023566, 0.02606613,
        0.07682437, 0.23757522, 0.07802615, 0.23619403, 0.05495761]],
      dtype=float32)

In [30]:
d.test_set[1][0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [31]:
y=model.predict_label([d.test_set[0][0]])

In [32]:
y

array([[6, 8, 2, 1, 7, 5, 3, 9, 0, 4]])

In [None]:
import tflearn
import speech_data
import tensorflow as tf

learning_rate = 0.0001
training_iters = 300000  # steps
batch_size = 64

width = 20  # mfcc features
height = 80  # (max) length of utterance
classes = 10  # digits

batch = word_batch = speech_data.mfcc_batch_generator(batch_size)
X, Y = next(batch)
trainX, trainY = X, Y
testX, testY = X, Y #overfit for now

# Network building
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy')
# Training

### add this "fix" for tensorflow version errors
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
    tf.add_to_collection(tf.GraphKeys.VARIABLES, x ) 


model = tflearn.DNN(net, tensorboard_verbose=0)
while 1: #training_iters
  model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,
          batch_size=batch_size)
  _y=model.predict(X)
model.save("tflearn.lstm.model")
print (_y)
print (y)


