In [1]:
import tensorflow as tf

In [2]:
tf.test.is_gpu_available()

True

In [3]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import scipy.io.wavfile as wav
import numpy as np
import librosa
from python_speech_features import mfcc

# Generación de ficheros

In [4]:
import numpy as np
import os
def generate_file_data(dir,name):
    directory=dir
    a={'0':'zero','1':'one','2':'two','3':'three','4':'four','5':'five','6':'six','7':'seven','8':'eight','9':'nine'}
    da=os.listdir(directory)
    da.sort()
    file = open(dir+name+'.txt',"w")
    for filename in da:
        if '.wav' in filename:
            file.write(filename+','+a[filename[0]]+'\n')
    file.close() 
    with open(directory+'/'+name+'.txt') as f:
        read_data = f.read()
        f.closed
    read_data=read_data.split('\n')
    read_data=read_data[0:len(read_data)-1]
    return read_data

# Encoding words with One Hot Encoding

In [5]:
vocabulary_words=['zero','one','two','three','four','five','six','seven','eight','nine']
label_encoder=LabelEncoder()
integer_encoded = label_encoder.fit_transform(vocabulary_words)
print(integer_encoded)
onehot_encoder = OneHotEncoder(sparse=False,categories='auto')
integer_encoded = integer_encoded.reshape(-1, 1)
onehot_encoded=onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)

[9 4 8 7 2 1 6 5 0 3]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]


In [6]:
def encode(x):# will transform the label using one hot encoding/take a group of strings

    integers=label_encoder.transform(x)
    integers=integers.reshape(-1,1)
    return onehot_encoder.transform(integers)

In [7]:
x=encode(['zero','one'])
print(x)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [8]:
def decode(x):
    inverted = onehot_encoder.inverse_transform(x)
    y=inverted.astype(dtype=int)# castea los valores
    y=y.reshape(1,-1)
    y=y[0]
    y=label_encoder.inverse_transform(y)
    return y

In [9]:
y=decode(x)
print(y)

['zero' 'one']


# MFCC 


In [10]:
def mfcc_features(DIR,list_dir):
    mfcc_audios=[]
    for dir in list_dir:
        wave, sr = librosa.load(DIR+dir, mono=True)
        features= librosa.feature.mfcc(wave, sr,n_mfcc=20)
        #features = mfcc(audio, samplerate=fs)
        features=np.pad(features,((0,0),(0,100-len(features[0]))),mode='constant', constant_values=0)
        #print(features)
        #print(features.shape)
        mfcc_audios.append(features)
        #print(mfcc_audios)
    #print(mfcc_audios)
    mfcc_audios=np.array(mfcc_audios)
    return mfcc_audios

In [11]:
def prepare_data(dir,name):
    file = open(dir+name)
    f=file.read()
    file.close()
    f=f.split('\n')
    f=f[0:len(f)-1]
    labels=[]
    names_audios=[]
    for i in f:
        j=i.split(',')
        names_audios.append(j[0])
        labels.append(j[1])
    labels=np.array(labels)
    onehot_encoded= encode(labels)
    #print(len(onehot_encoded))
    mfcc=mfcc_features(dir,names_audios)
    #print(onehot_encoded)
    #d=decode(onehot_encoded)
    #print(d)
    print(name+' OK')
    return mfcc,onehot_encoded



In [12]:
class dataset:
    def __init__(self,data):
        self.i=0#para el shuffle
        self.data_dir=data
        self.shuffle=None
        self.dir_training=data+'/training/'
        self.dir_test=data+'/test/'
        self.training_set=None
        self.test_set=None
    def split_dataset(self):
        generate_file_data(self.dir_training,name='training')
        generate_file_data(self.dir_test,name='test')
        self.training_set=prepare_data(self.dir_training,'training.txt')
        self.test_set=prepare_data(self.dir_test,'test.txt')
    #def next_batch(self,size):
    #
    #    return self.training_set[self.shuffle]
    

In [13]:
d=dataset('data')
d.split_dataset()

training.txt OK
test.txt OK


In [14]:
d.dir_test

'data/test/'

In [15]:
d.training_set[0][2].shape

(20, 100)

In [16]:
#[d.training_set[0][i].shape[0] for i in range(80)] will display numofcep

In [17]:
np.size(d.training_set[0])

160000

In [18]:
d.test_set

(array([[[-6.16712861e+02, -5.93040323e+02, -5.57094388e+02, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [ 9.56967723e+01,  1.36445463e+02,  1.80911571e+02, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.93036704e+01, -7.71567637e+01, -6.38301433e+01, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         ...,
         [ 8.04933219e+00, -4.26270108e+00, -9.13070011e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [ 5.56216186e+00, -3.05505231e+00, -2.73119511e+00, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [-8.58515724e+00, -1.73228020e+01, -2.23895974e+01, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],
 
        [[-6.47235052e+02, -6.30088770e+02, -6.15745537e+02, ...,
           0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
         [ 1.86630475e+02,  1.95857416e+02,  1.91884645e+02, ...,
           0.00000000

In [19]:
d.test_set[1]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 

# Red neuronal 

In [20]:
import tflearn
import tensorflow as tf

In [21]:
learning_rate = 0.0001
training_iters = 300000  # steps
batch_size = 64

In [22]:
width = 20  # mfcc features
height = 100  # (max) length of utterance
classes = 10  # digits

In [23]:
trainX, trainY = d.training_set[0],d.training_set[1]
testX, testY = d.test_set[0],d.test_set[1]

In [1]:
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy')

NameError: name 'tflearn' is not defined

In [31]:
model = tflearn.DNN(net, tensorboard_verbose=0)

In [26]:
import time

In [32]:
ti=time.time()
tflearn.init_graph()
for i in range(100):
    print(i)
    model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
      #_y=model.predict(X)
    model.save("tflearn.lstm.modelsingpu")
tf=time.time()


0


IndexError: list index out of range

In [33]:
print(tf-ti)

1213.108857870102


In [31]:
ti=time.time()

tflearn.init_graph(num_cores=4,gpu_memory_fraction=0.8)
for i in range(100):
    print(i)
    #with tf.device('/gpu:0'):
    model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
        #_y=model.predict(X)
    model.save("tflearn.lstm.modelsingpu")
tf=time.time()

Training Step: 3999  | total loss: [1m[32m1.04761[0m[0m | time: 0.036s
| Adam | epoch: 2000 | loss: 1.04761 - acc: 0.7877 -- iter: 64/80
Training Step: 4000  | total loss: [1m[32m1.09138[0m[0m | time: 1.065s
| Adam | epoch: 2000 | loss: 1.09138 - acc: 0.7745 | val_loss: 1.55361 - val_acc: 0.5238 -- iter: 80/80
--
INFO:tensorflow:/home/visoc/Documentos/Acursos/Seminario2/Seminario-de-Tesis-2/pre-pruebas/tflearn.lstm.modelsingpu is not in all_model_checkpoint_paths. Manually adding it.


In [32]:
print("tiempo de entrenamiento")
print(tf-ti)

tiempo de entrenamiento
1209.7606360912323


In [24]:
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net,256, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy')
model= tflearn.DNN(net, tensorboard_verbose=0)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [26]:
import time 
ti=time.time()
tflearn.init_graph()
for i in range(100):
    #with tf.device('/gpu:0'):
    model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
    model.save("tflearn.lstm.modelgpu")
tf=time.time()

Training Step: 1677  | total loss: [1m[32m1.27029[0m[0m | time: 0.028s
[2K| Adam | epoch: 839 | loss: 1.27029 - acc: 0.7254 -- iter: 64/80


KeyboardInterrupt: 

In [34]:
print("tiempo de entrenamiento")
print(tf-ti)

tiempo de entrenamiento
1196.7383062839508


In [27]:
_y=model.predict([d.test_set[0][0]])

In [29]:
_y

array([[0.05129381, 0.0809454 , 0.09788167, 0.06023566, 0.02606613,
        0.07682437, 0.23757522, 0.07802615, 0.23619403, 0.05495761]],
      dtype=float32)

In [30]:
d.test_set[1][0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [31]:
y=model.predict_label([d.test_set[0][0]])

In [32]:
y

array([[6, 8, 2, 1, 7, 5, 3, 9, 0, 4]])

In [None]:
import tflearn
import speech_data
import tensorflow as tf

learning_rate = 0.0001
training_iters = 300000  # steps
batch_size = 64

width = 20  # mfcc features
height = 80  # (max) length of utterance
classes = 10  # digits

batch = word_batch = speech_data.mfcc_batch_generator(batch_size)
X, Y = next(batch)
trainX, trainY = X, Y
testX, testY = X, Y #overfit for now

# Network building
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy')
# Training

### add this "fix" for tensorflow version errors
col = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
for x in col:
    tf.add_to_collection(tf.GraphKeys.VARIABLES, x ) 


model = tflearn.DNN(net, tensorboard_verbose=0)
while 1: #training_iters
  model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,
          batch_size=batch_size)
  _y=model.predict(X)
model.save("tflearn.lstm.model")
print (_y)
print (y)




In [36]:
net = tflearn.input_data([None, width, height])
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=learning_rate, loss='categorical_crossentropy')
model = tflearn.DNN(net, tensorboard_verbose=0)

AttributeError: 'float' object has no attribute 'device'

In [None]:
ti=time.time()
for i in range(100):
    print(i)
    #with tf.device('/gpu:0'):
    model.fit(trainX, trainY, n_epoch=10, validation_set=(testX, testY), show_metric=True,batch_size=batch_size)
        #_y=model.predict(X)
    model.save("tflearn.lstm.modelgpu")
tf=time.time()