In [1]:
import os
import scipy.io.wavfile as wav
import numpy as np
from python_speech_features import mfcc

In [34]:
class Audio():
    def __init__(self,dir,numcontext,numcep):
        self.dir=dir
        self.fs,self.audio=wav.read(self.dir)
        self.features=None
        self.vector=None
        self.numcep=numcep
        self.ncontext=numcontext
    def get_mfcc(self):
        self.features=mfcc(self.audio,samplerate=self.fs,numcep=self.numcep)
        self.features=self.features[::1]
    def audio_to_vector(self):
        self.vector=np.array([],np.float32)
        self.vector.resize((self.features.shape[0], self.numcep + 2 * self.numcep * self.ncontext))
        #empty_mfcc=np.zeros(self.numcep)
        empty_mfcc = np.array([])
        empty_mfcc.resize((self.numcep))
        time_slices = range(self.vector.shape[0])
        context_past_min = time_slices[0] + self.ncontext
        context_future_max = time_slices[-1] - self.ncontext
        for time in time_slices:
            need_empty_past = max(0, (context_past_min - time))
            empty_source_past = list(empty_mfcc for empty_slots in range(need_empty_past))
            data_source_past = self.features[max(0, time - self.ncontext):time]
            print(data_source_past)
            assert(len(empty_source_past) + len(data_source_past) == self.ncontext)

            # Pick up to numcontext time slices in the future, and complete with empty
            # mfcc features
            need_empty_future = max(0, (time - context_future_max))
            empty_source_future = list(empty_mfcc for empty_slots in range(need_empty_future))
            data_source_future = self.features[time + 1:time + self.ncontext + 1]
            assert(len(empty_source_future) + len(data_source_future) == self.ncontext)

            if need_empty_past:
                print(empty_source_past)
                print(len(data_source_past))
                past = np.concatenate((empty_source_past, data_source_past))
            else:
                past = data_source_past

            if need_empty_future:
                future = np.concatenate((data_source_future, empty_source_future))
            else:
                future = data_source_future

            past = np.reshape(past, self.ncontext * self.numcep)
            now = self.features[time]
            future = np.reshape(future, self.ncontext * self.numcep)

            self.vector[time] = np.concatenate((past, now, future))
            assert(len(self.vector[time]) == self.numcep + 2 * self.numcep * self.ncontext)           
        self.vector = (self.vector- np.mean(train_inputs)) / np.std(self.vector)
        return self.vector


def get_audio_and_transcript(txt_files, wav_files, n_input, n_context):
    #convierte audio a mfcc y texto a array numerioc
    audio = []
    audio_len = []
    transcript = []
    transcript_len = []
    for txt, wav in zip(txt_files, wav_files):
        # load audio and convert to features
        audio_data=Audio(dir=wav,numcontext=n_context,numcep=n_input)
        audio_data = audio_data.astype('float32')
        audio.append(audio_data)
        audio_len.append(np.int32(len(audio_data)))

        # load text transcription and convert to numerical array
        target = normalize_txt_file(txt)
        target = text_to_char_array(target)
        transcript.append(target)
        transcript_len.append(len(target))

    audio = np.asarray(audio)
    audio_len = np.asarray(audio_len)
    transcript = np.asarray(transcript)
    transcript_len = np.asarray(transcript_len)
    return audio, audio_len, transcript, transcript_len

In [35]:
x=audio('data/1.wav',9,26)

In [36]:
x.get_mfcc()

In [37]:
x.features

array([[  1.17122456e+01,  -1.50613435e+01,   1.28473271e+00, ...,
          1.67847758e-01,   4.29110407e-01,  -6.83650851e-01],
       [  1.13497046e+01,  -1.79930044e+01,  -3.04461282e+00, ...,
          1.96904338e-01,  -3.60074002e-01,  -4.70354219e-01],
       [  1.14285449e+01,  -1.70863347e+01,  -4.55747966e-01, ...,
         -4.60891208e-02,  -9.70633602e-01,  -3.39746094e-01],
       ..., 
       [  1.14691323e+01,  -1.67470461e+01,  -3.03082240e+00, ...,
         -2.07874183e-01,  -1.06944053e+00,  -3.96947241e-02],
       [  1.16843965e+01,  -1.72929583e+01,  -8.71107398e-01, ...,
          7.76714088e-03,  -1.33748338e-01,   1.75703731e-01],
       [  1.15305823e+01,  -1.82706010e+01,  -3.16093974e+00, ...,
          7.54566519e-02,   5.26135950e-01,   1.04109704e+00]])

In [38]:
x.audio_to_vector()

[]
[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  

    5.69552842e-01   2.59234178e+00]]
[[  1.16897437e+01  -1.48004949e+01   3.56688349e+00   3.62221772e+00
    1.80340148e-01   1.38392014e+00   1.54615922e+01   1.36903672e+01
    1.08759375e+01   4.24751055e+00   3.30499110e+00   8.56782943e+00
    2.84563985e+00   3.08187554e+00   3.29557196e+00   3.46013428e+00
    1.98306993e+00   4.04043020e+00   4.46146311e+00   3.10751600e-02
    8.72508609e-01   4.63331533e-01  -4.14396037e-01   2.71502736e-01
    5.37943260e-02  -5.19052779e-01]
 [  1.17317414e+01  -1.42110187e+01  -7.99264436e-02   4.41576985e+00
    7.06519176e+00   3.54046082e+00   1.05553686e+01   1.11723479e+01
    1.80973479e+01   9.61225292e+00   2.23799986e+00   6.93619976e+00
    4.29582623e+00   1.41684421e+00  -7.76977657e-01   2.16997264e+00
   -3.82434814e-01   3.79915084e+00   3.75475195e+00   3.29361791e-01
    1.64274196e+00   1.31876729e+00   3.74172951e-01  -1.39099605e-01
   -4.74395020e-01  -6.17969431e-01]
 [  1.16470208e+01  -1.68205258e+01  -2.49867546

    2.48854711e-01  -9.97394121e-01]]
[[  1.61967310e+01  -7.60095124e+00   7.90425020e-01   1.65746046e+01
   -3.41772060e+00  -2.06421898e+01  -3.57390831e+01   4.74465883e+00
   -4.65205282e+00  -5.06396583e+00   2.21890450e+00   5.85508831e+00
    1.10391056e+01  -4.83787118e+00  -2.28791132e+00  -7.57837575e+00
    3.89174084e+00  -1.20393589e+01  -6.35914214e+00   1.65558099e+00
   -4.27985408e+00  -1.25125131e+00   2.03802641e-01  -5.89551071e-02
    3.04960589e-01  -1.22013535e-02]
 [  1.72883511e+01  -5.65966212e+00  -8.36932411e+00   2.93639369e+01
   -1.08701651e+01  -2.23056729e+01  -3.71811714e+01   5.58366757e+00
   -7.22578313e-01  -6.11866340e+00   2.04450790e+00   1.51781647e+01
    3.19109961e+00  -3.75921220e+00  -6.50246531e+00  -8.67710893e+00
    6.86305763e+00  -9.95584919e+00  -2.71069257e+00   3.87938625e+00
   -2.60774593e+00  -1.26443196e+00   4.93060945e-01   4.99727604e-02
    8.59884968e-01   2.54593147e-01]
 [  1.75628414e+01  -4.92805442e+00  -1.12122453

    8.28084318e-01  -1.45051671e+00]]
[[  1.65218346e+01  -2.35314933e+00  -1.21199969e+01  -1.45119816e+01
    1.27256791e+01  -1.02184719e+01  -3.49111810e+00   2.08922604e+01
   -3.66907982e-01   1.37072155e+01   1.66827332e+00   2.30316110e+00
   -7.63530007e+00  -1.74927789e+00   1.41730033e+00   9.18559639e+00
   -9.46762140e+00   4.95871835e+00   4.10750777e+00  -3.59063887e+00
    2.52882922e+00  -1.32366487e+00  -2.33724892e-01  -2.66454635e-02
   -2.40966769e-01   1.18704456e+00]
 [  1.70001100e+01   2.61401985e+00  -1.31706297e+01  -2.04751183e+01
    6.21844721e+00  -1.00285256e+01  -8.85878843e+00   1.39544191e+01
    3.87712403e+00   5.30542699e+00   4.11874222e+00   7.98473987e+00
   -1.33067722e+01   1.04308693e+01   4.58705370e+00   1.56438346e+00
   -5.16956807e+00   5.37140467e+00   3.87436811e+00  -2.36647483e+00
    3.38026144e+00  -1.09512636e+00  -4.01088019e-02  -7.07859703e-02
   -4.14531774e-01   9.34237581e-01]
 [  1.68142767e+01   1.75142134e+01  -1.42195918

[[  1.48772696e+01  -2.68658714e+01  -3.60084972e+00   1.48248243e+01
    1.50343925e+01  -6.91971279e+00  -1.50086897e+00   2.40455649e+01
    9.90528651e+00   9.65528801e-01   6.07646681e+00   4.65627180e-03
   -7.13009471e+00   4.77064149e+00   4.53993099e+00   5.90122506e+00
   -3.65043366e+00  -6.12455631e-02   1.80901602e+00   2.44052840e+00
    1.06450312e+00  -9.11073858e-01  -3.74365382e-02  -4.81075586e-02
   -7.22891161e-01  -5.63258487e-01]
 [  1.47188874e+01  -2.43294585e+01  -4.27571790e+00   1.86699452e+01
    1.43128521e+01  -7.03454348e+00   5.86896495e-02   2.02272608e+01
    1.15903452e+00  -1.76044919e+00   9.20215657e+00  -2.65216408e+00
   -8.80280028e+00   7.10363855e+00   1.42619968e+00   2.20046561e+00
   -3.40119316e+00   4.30710952e-01   5.22086241e+00   2.91685949e+00
    7.78288575e-01  -2.27193062e-01   1.45785592e-01   2.06872456e-03
   -9.39945888e-02  -3.96756342e-01]
 [  1.44016443e+01  -1.25709298e+01   5.85937441e+00   2.83283005e+01
    1.15228125e+

   -6.28648900e-01  -6.83438833e-01]]
[[  1.15294680e+01  -1.46101472e+01  -3.71462907e+00  -3.63621645e+00
    6.50678203e+00   1.05909618e+01   1.63945830e+01   1.34066206e+01
    1.12328004e+01   1.39817783e+01   1.78283111e+00  -3.62203578e+00
   -2.60393289e-01   3.13441430e+00   2.80094882e+00   1.12928715e+01
    6.68713161e+00   3.20085414e+00   1.82643341e+00   3.64758241e-01
    1.69622887e+00   1.10580730e+00   1.13285609e-01   4.33234498e-02
   -3.65042032e-01  -1.63346117e+00]
 [  1.15346876e+01  -1.34913616e+01   1.13499400e+00  -5.76466203e-01
    6.25311624e+00   3.77091722e+00   5.97028520e+00   9.61774318e+00
    9.38987698e+00   7.46608570e+00  -6.69743377e-01  -3.08500443e+00
   -1.56328420e+00   1.12252328e+01   5.20465711e+00   7.45153548e+00
    4.52238483e+00   3.41006616e+00   4.43369751e-01  -2.92058342e-01
    7.74171517e-01  -2.47249994e-01  -1.83792840e-02  -8.48668363e-02
   -5.02473080e-03  -7.11769370e-01]
 [  1.15153944e+01  -1.32342505e+01   1.51717509

   -2.82604762e-01   4.20846145e-01]]
[[  1.49535853e+01  -2.36942064e+01   8.88402086e+00   2.37761120e+01
   -6.20765033e+00   1.56456138e+00  -4.51879067e+00  -1.08804657e+01
    3.81538196e+00   7.67700349e+00  -4.22234806e+00   1.17387679e+00
    7.79752836e+00   4.93213704e+00  -9.67682364e+00  -6.20159904e+00
   -3.24264985e-01  -4.82331809e+00  -1.61868042e+00  -2.76567299e+00
    5.02345530e-01  -1.52803663e+00  -5.22283212e-01   7.88080935e-02
    6.51399898e-01   3.94413013e-01]
 [  1.52186445e+01  -1.64973082e+01   1.02036499e+01   2.63484157e+01
   -9.66191713e+00  -1.22285578e+01  -2.03962502e+01  -5.86685090e+00
    7.12264195e+00  -1.18477754e+00  -2.64177018e+00   6.49995967e+00
    5.71113865e+00  -4.63920582e+00  -1.09979174e+01   2.30220775e+00
    1.29718364e+00  -4.51472808e+00   1.75581141e+00  -4.32879478e+00
   -2.57104908e+00  -1.64547437e+00   2.79706300e-01   7.01579495e-03
   -3.40041156e-01  -1.37320925e+00]
 [  1.72083333e+01  -1.53850805e+01  -3.73590353

    1.15384250e+00   6.51790463e-01]]
[[  1.31262083e+01  -1.32048325e+01   1.09512591e+01   4.50359611e+00
    3.87507228e+00   5.82802977e+00   2.66076131e+00   1.97504437e+01
    2.65817032e+00   5.11346523e+00  -8.49927638e-01   5.46545600e+00
   -2.10992997e+00  -4.55218684e+00  -3.96789382e+00  -2.56123951e+00
   -5.23098184e+00   1.99429784e+00  -1.11495264e+00   1.73016787e+00
    1.36132452e+00  -3.51881460e-01   3.96555732e-01   1.12383639e-01
   -6.07003871e-01  -1.67381698e+00]
 [  1.68821397e+01   1.59949007e+00  -2.25855588e+01  -1.44487146e+01
   -2.60177246e+01   7.04599343e+00  -1.55487648e+01  -1.08766877e+01
    9.22262709e+00  -1.57251711e+01  -4.68082582e+00  -6.05582203e+00
   -1.31873256e+01  -8.40578359e+00   4.36827767e+00  -4.80384647e+00
   -4.91710392e+00   3.74716283e-01  -2.08296427e+00  -1.29461553e+00
   -2.98719607e-01   1.46678286e-01  -1.47388521e-01   7.16002975e-02
   -2.20041890e-01   3.91476671e-01]
 [  1.75347198e+01   4.83983177e-01  -2.48969493

    1.41380677e-01  -1.11287090e+00]]
[[  1.38985374e+01   5.04496501e-01  -1.13319117e+01   1.03609155e+00
    7.02867244e+00  -9.23058152e+00  -1.07825510e+01  -1.51792624e+00
   -3.93126197e+00   4.15774469e+00  -5.37672870e+00  -1.91265343e+01
   -8.26225022e+00  -1.57806796e+00  -3.92753587e+00  -9.31511437e+00
    9.71709227e-01  -5.87450970e+00  -3.91692414e-01   1.27149404e+00
   -1.24773632e+00   8.24755225e-03  -1.16702236e-01   1.22972238e-01
    5.81926207e-01   3.34269661e-01]
 [  1.35000317e+01  -9.21415213e-01  -8.14579363e+00   2.23254087e+00
    9.13096470e+00  -1.12378732e+01  -1.69929529e+01   1.76075135e+00
   -3.59196739e+00  -6.41709440e+00  -6.98236046e+00  -1.92657280e+01
   -1.54718559e+01  -3.72225565e+00  -4.30921857e+00  -1.77623147e+01
   -1.98511146e+00  -5.04004125e+00  -8.19920358e-01   2.42632485e+00
   -1.86681573e+00  -4.67057688e-01   2.38543886e-02   1.08092064e-01
    6.49993269e-01   1.98987060e-02]
 [  1.32934883e+01   2.54189169e-01  -5.83600446

   -2.45050075e-01  -2.14622816e+00]]
[[  1.42525973e+01  -6.51893329e+00  -4.50561511e+00  -7.53112415e+00
   -6.42625582e-01  -1.25267009e+01  -8.17543508e+00   9.82436799e+00
   -1.40098967e+01  -5.14536909e+00   1.61711320e+01   1.00397013e+01
   -1.66002468e+01  -4.93384416e+00   8.64387181e+00  -5.30214281e-01
    2.10317070e+00  -4.52578573e+00  -7.62613152e-01  -6.76521898e-01
   -1.86723109e-01   1.45124575e+00   3.75043410e-01   3.80067244e-01
    1.02701488e+00  -2.07623759e-02]
 [  1.42543773e+01  -2.51376661e+01  -5.31374477e+00   6.52274683e+00
    1.43809457e+01  -2.42411989e+00  -6.66292556e+00   8.89359021e+00
   -1.02561842e+01  -6.38690278e+00   1.05991558e+01   1.63955245e+01
   -6.43227035e+00   1.91698992e+00   9.17904993e+00   1.46504449e+00
   -1.16979623e+00  -4.55703024e+00   1.31140938e+00   1.20888410e+00
    6.65739297e-01   2.45845453e-01   5.11753061e-01   3.79810037e-01
    1.65411396e+00   8.71840593e-01]
 [  1.46692800e+01  -3.16455160e+01  -4.54576167

    1.95278365e-01   3.23615879e-01]]
[[  1.51615361e+01  -3.05258966e+00  -7.46586615e+00   1.89851019e+01
   -1.24952737e+00  -2.14532951e+01  -3.24223063e+01   1.06716525e+00
    2.11891782e+01  -1.87247908e+01   4.60929132e+00   1.16091062e+01
   -8.78523196e+00   5.45133474e-01  -1.90670542e+00  -5.61096598e+00
   -4.01065494e+00   1.94702070e+00  -1.65794153e+00   1.16383331e-01
    3.37658130e+00  -1.84737906e-01   4.89619963e-02  -2.50599240e-02
    1.36408280e+00  -4.56025687e-01]
 [  1.50960860e+01   2.99711096e-01  -1.34299194e+01   2.99164522e+00
   -5.71971233e+00  -8.14391923e+00  -2.84761319e+01  -6.85671778e+00
    1.66788468e+01  -1.33589529e+01   1.94645468e-01   8.96992327e-01
   -1.09477128e+01  -5.28412261e-01   3.32141530e-01  -1.20791774e+01
   -6.06743099e+00   2.96569381e+00   1.46319797e+00  -4.15532215e+00
    1.49672945e+00  -1.32952776e-01  -5.28338202e-02  -2.15899915e-01
    1.14811041e+00  -4.02437199e-01]
 [  1.48123057e+01   6.36362291e-01  -1.17503925

   -6.77535357e-01   5.56918091e-01]]
[[  1.15714460e+01  -1.57879656e+01   7.32093265e-01   6.89191330e-01
    3.51001480e+00   9.69764017e+00   1.40639302e+01   4.61312010e+00
    6.35890324e+00   9.27752876e+00  -3.98888257e+00   4.90951982e+00
    6.88705692e+00   5.35660874e+00   7.44125329e+00   3.93086617e+00
   -3.73247427e+00   1.55091684e+00   3.65799727e+00   2.66584470e+00
    9.08553742e-02  -3.19098672e-01   6.67122377e-01  -1.57412601e-01
   -6.63827493e-01  -3.35734375e-01]
 [  1.15955751e+01  -1.65543202e+01  -2.41061703e-01   6.22322649e-01
    1.00615096e+00   3.03448943e+00   9.72385948e+00   4.31282311e+00
    8.94859634e+00   1.18704922e+01  -2.71937784e+00  -4.25190916e-01
    6.80749563e+00   1.28176498e+01   8.81185375e+00   1.60402796e+00
   -5.30129153e+00   3.29690541e+00   3.06451646e+00  -3.89594788e-02
   -7.10832547e-01   1.80592509e-03   2.70772250e-01   4.56698839e-02
   -5.79286034e-01  -1.48773541e+00]
 [  1.16064958e+01  -1.46543281e+01  -8.97511765

[[  1.13849974e+01  -1.68813957e+01  -6.62202196e-02   3.08886040e+00
    3.04802302e+00   6.86498253e+00   8.80097828e+00   4.65953817e+00
    8.55766140e-01   7.82849403e+00   1.28654616e+01  -2.60071870e+00
    2.96480307e+00  -8.79626570e-02   2.13219532e+00   6.48108340e+00
    3.27616649e+00   3.94978515e-01   1.41632225e+00   1.14849605e+00
   -2.43864680e-01   9.79034626e-01   1.79227027e-02  -1.18682075e-01
   -6.36632045e-01  -1.59639743e+00]
 [  1.15822053e+01  -1.74281569e+01  -1.78816236e-01  -3.91721067e-03
    2.17099564e+00   1.05803095e+01   9.93209671e+00   1.03974142e+01
    7.61003561e+00   1.17710859e+01   1.36463961e+01   8.40431807e+00
    6.50023008e+00   3.51003167e-01   3.43815661e-01   5.53487975e+00
    1.39364702e+00   1.61432844e+00  -1.42990882e-01   3.41415361e-01
    1.42794949e+00   1.53321323e+00   2.44788236e-01  -1.27747787e-01
   -8.59520665e-01  -9.42352247e-01]
 [  1.15781707e+01  -1.87040460e+01  -1.95745279e+00   1.06410700e-01
    1.46390902e+

[[  1.16108614e+01  -1.74641020e+01  -4.67844844e-01   2.99634608e+00
    1.02479367e+01   9.90217651e+00   1.02706372e+01   1.05984622e+01
    7.32714837e+00   2.70257097e+00   5.20872122e+00   7.40427013e+00
    6.43626324e+00  -1.57308673e+00  -3.01674380e+00   1.00584266e-01
   -9.77241580e-01   2.32201283e+00   3.42649405e+00   3.13786039e+00
    1.99224307e+00  -1.70644257e-01  -1.35095344e-01  -9.88836734e-02
   -7.55787251e-01  -5.53198560e-01]
 [  1.15498057e+01  -1.48994452e+01   3.16029314e-01  -1.07672541e+00
    8.18938408e+00   1.15137339e+01   1.05508550e+01   9.97958452e+00
    1.11089723e+01   1.08219652e+01  -1.13617451e-01  -1.24520309e+00
    3.19491561e+00  -1.78281166e+00   2.47402629e-01   2.83399347e+00
    5.56578336e+00   5.83056139e+00   2.38339913e+00   2.65379020e+00
    7.86034232e-01  -6.54462640e-01  -1.03562857e-01  -2.47934371e-01
   -7.66565586e-01   6.88774428e-01]
 [  1.13962976e+01  -1.64781977e+01  -2.71519881e+00  -5.08105978e+00
    6.80519606e-

[[  1.15760832e+01  -1.64462640e+01  -8.59736222e-02   1.58169847e+00
    5.53935430e+00   7.85467450e+00   1.08335423e+01   5.79205571e+00
    3.40955458e+00   5.47557447e+00   4.96244676e+00   8.45013884e+00
    1.74203734e+00   2.90477822e+00   2.66923555e+00   7.25932805e+00
    4.15940346e+00   3.88006183e+00   3.34545346e+00   4.24388241e+00
    1.22971769e-01  -2.34010838e-01  -1.85544227e-02  -7.31700678e-02
    2.44196228e-01   3.08909703e-01]
 [  1.15788878e+01  -1.52212144e+01   5.89390863e-01   2.91946363e+00
    8.15363406e+00   1.54163670e+01   1.76623396e+01   3.89846519e+00
    7.22993188e+00   4.18743618e+00   5.53875800e+00   5.97849543e+00
   -8.72272677e-01   1.80660215e+00   9.79417607e-02   5.30956816e+00
    1.95682283e+00   1.74342129e+00   2.32554671e+00   1.26926564e+00
   -1.30966340e+00  -6.25432040e-01   5.93009587e-02  -5.70495907e-02
    2.94354344e-02  -1.32882893e+00]
 [  1.15435284e+01  -1.74844305e+01  -2.43912580e+00  -1.13120085e+00
    2.81076941e+

   -8.12790180e-01  -1.66926869e+00]]
[[  1.15411201e+01  -1.57250970e+01   5.06057460e+00   8.14818766e+00
    6.60412399e+00   7.75005663e+00   1.22871580e+01   2.87306755e+00
   -1.76892880e+00  -6.95878879e-01   6.34633293e+00   1.01291552e+01
    8.75156244e+00   7.01352233e+00   7.29549112e+00   3.52395857e+00
    7.66996442e-03   1.35148415e+00  -4.39427199e+00  -2.74330990e+00
    1.56699586e+00   1.20864423e+00   3.04760143e-01   7.03225819e-02
    1.97286270e-02   2.40637576e-01]
 [  1.15009327e+01  -1.51971199e+01   1.46193203e+00   5.52802723e-01
    1.95884214e+00  -9.07202741e-01   5.36697466e+00   1.29502244e+00
    7.49715371e+00   5.59937113e+00   4.31902714e+00   7.97259565e+00
    7.37592925e+00   2.97351177e+00   4.17625247e+00   9.84857087e+00
    4.09241879e+00   3.92760389e+00   6.06204723e-02   8.88159619e-01
    2.18178604e+00   6.08618420e-01   1.27687289e-01   8.90243595e-02
   -3.03710689e-01  -4.99739289e-01]
 [  1.16017480e+01  -1.42836735e+01   3.44436879

NameError: name 'train_inputs' is not defined

In [39]:
#!/usr/bin/env python3

import os
import scipy.io.wavfile as wav

import numpy as np
from python_speech_features import mfcc
from features.utils.text import text_to_char_array, normalize_txt_file


def load_wavfile(wavfile):
    """
    Read a wav file using scipy.io.wavfile
    """
    rate, sig = wav.read(wavfile)
    data_name = os.path.splitext(os.path.basename(wavfile))[0]
    return rate, sig, data_name


def get_audio_and_transcript(txt_files, wav_files, n_input, n_context):
    '''
    Loads audio files and text transcriptions from ordered lists of filenames.
    Converts to audio to MFCC arrays and text to numerical arrays.
    Returns list of arrays. Returned audio array list can be padded with
    pad_sequences function in this same module.
    '''
    audio = []
    audio_len = []
    transcript = []
    transcript_len = []

    for txt_file, wav_file in zip(txt_files, wav_files):
        # load audio and convert to features
        audio_data = audiofile_to_input_vector(wav_file, n_input, n_context)
        audio_data = audio_data.astype('float32')

        audio.append(audio_data)
        audio_len.append(np.int32(len(audio_data)))

        # load text transcription and convert to numerical array
        target = normalize_txt_file(txt_file)
        target = text_to_char_array(target)
        transcript.append(target)
        transcript_len.append(len(target))

    audio = np.asarray(audio)
    audio_len = np.asarray(audio_len)
    transcript = np.asarray(transcript)
    transcript_len = np.asarray(transcript_len)
    return audio, audio_len, transcript, transcript_len


def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    '''
    Turn an audio file into feature representation.

    This function has been modified from Mozilla DeepSpeech:
    https://github.com/mozilla/DeepSpeech/blob/master/util/audio.py

    # This Source Code Form is subject to the terms of the Mozilla Public
    # License, v. 2.0. If a copy of the MPL was not distributed with this
    # file, You can obtain one at http://mozilla.org/MPL/2.0/.
    '''

    # Load wav files
    fs, audio = wav.read(audio_filename)

    # Get mfcc coefficients
    orig_inputs = mfcc(audio, samplerate=fs, numcep=numcep)

    # We only keep every second feature (BiRNN stride = 2)
    orig_inputs = orig_inputs[::1]

    # For each time slice of the training set, we need to copy the context this makes
    # the numcep dimensions vector into a numcep + 2*numcep*numcontext dimensions
    # because of:
    #  - numcep dimensions for the current mfcc feature set
    #  - numcontext*numcep dimensions for each of the past and future (x2) mfcc feature set
    # => so numcep + 2*numcontext*numcep
    train_inputs = np.array([], np.float32)
    train_inputs.resize((orig_inputs.shape[0], numcep + 2 * numcep * numcontext))

    # Prepare pre-fix post fix context
    empty_mfcc = np.array([])
    empty_mfcc.resize((numcep))

    # Prepare train_inputs with past and future contexts
    time_slices = range(train_inputs.shape[0])
    context_past_min = time_slices[0] + numcontext
    context_future_max = time_slices[-1] - numcontext
    for time_slice in time_slices:
        # Reminder: array[start:stop:step]
        # slices from indice |start| up to |stop| (not included), every |step|

        # Add empty context data of the correct size to the start and end
        # of the MFCC feature matrix

        # Pick up to numcontext time slices in the past, and complete with empty
        # mfcc features
        need_empty_past = max(0, (context_past_min - time_slice))
        empty_source_past = list(empty_mfcc for empty_slots in range(need_empty_past))
        data_source_past = np.array(orig_inputs[max(0, time_slice - numcontext):time_slice])
        assert(len(empty_source_past) + len(data_source_past) == numcontext)

        # Pick up to numcontext time slices in the future, and complete with empty
        # mfcc features
        need_empty_future = max(0, (time_slice - context_future_max))
        empty_source_future = list(empty_mfcc for empty_slots in range(need_empty_future))
        data_source_future = orig_inputs[time_slice + 1:time_slice + numcontext + 1]
        assert(len(empty_source_future) + len(data_source_future) == numcontext)

        if need_empty_past:
            print(empty_source_past)
            print(data_source_past)
            past = np.concatenate((empty_source_past, data_source_past))
        else:
            past = data_source_past

        if need_empty_future:
            future = np.concatenate((data_source_future, empty_source_future))
        else:
            future = data_source_future

        past = np.reshape(past, numcontext * numcep)
        now = orig_inputs[time_slice]
        future = np.reshape(future, numcontext * numcep)

        train_inputs[time_slice] = np.concatenate((past, now, future))
        assert(len(train_inputs[time_slice]) == numcep + 2 * numcep * numcontext)

    # Scale/standardize the inputs
    # This can be done more efficiently in the TensorFlow graph
    train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)
    return train_inputs


def pad_sequences(sequences, maxlen=None, dtype=np.float32,
                  padding='post', truncating='post', value=0.):

    '''
    # From TensorLayer:
    # http://tensorlayer.readthedocs.io/en/latest/_modules/tensorlayer/prepro.html

    Pads each sequence to the same length of the longest sequence.

        If maxlen is provided, any sequence longer than maxlen is truncated to
        maxlen. Truncation happens off either the beginning or the end
        (default) of the sequence. Supports post-padding (default) and
        pre-padding.

        Args:
            sequences: list of lists where each element is a sequence
            maxlen: int, maximum length
            dtype: type to cast the resulting sequence.
            padding: 'pre' or 'post', pad either before or after each sequence.
            truncating: 'pre' or 'post', remove values from sequences larger
            than maxlen either in the beginning or in the end of the sequence
            value: float, value to pad the sequences to the desired value.

        Returns:
            numpy.ndarray: Padded sequences shape = (number_of_sequences, maxlen)
            numpy.ndarray: original sequence lengths
    '''
    lengths = np.asarray([len(s) for s in sequences], dtype=np.int64)

    nb_samples = len(sequences)
    if maxlen is None:
        maxlen = np.max(lengths)

    # take the sample shape from the first non empty sequence
    # checking for consistency in the main loop below.
    sample_shape = tuple()
    for s in sequences:
        if len(s) > 0:
            sample_shape = np.asarray(s).shape[1:]
            break

    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
    for idx, s in enumerate(sequences):
        if len(s) == 0:
            continue  # empty list was found
        if truncating == 'pre':
            trunc = s[-maxlen:]
        elif truncating == 'post':
            trunc = s[:maxlen]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)

        # check `trunc` has expected shape
        trunc = np.asarray(trunc, dtype=dtype)
        if trunc.shape[1:] != sample_shape:
            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
                             (trunc.shape[1:], idx, sample_shape))

        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
    return x, lengths


In [40]:
b=audiofile_to_input_vector('data/1.wav',26,9)

[array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]), array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,

In [7]:
b

NameError: name 'b' is not defined