In [1]:
import tensorflow as tf
from __future__ import print_function
from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn
import os,time
import numpy as np
import json
from functools import wraps

In [3]:
tf.reset_default_graph()

# Create input data
X = np.random.randn(2, 10, 8)

# The second example is of length 6 
X[1,6:] = 0
X_lengths = [10, 6]

cell = tf.contrib.rnn.LSTMCell(num_units=64, state_is_tuple=True)

outputs, states  = bidirectional_dynamic_rnn(
    cell_fw=cell,
    cell_bw=cell,
    dtype=tf.float64,
    sequence_length=X_lengths,
    inputs=X)

output_fw, output_bw = outputs
states_fw, states_bw = states

result = tf.contrib.learn.run_n(
    {"output_fw": output_fw, "output_bw": output_bw, "states_fw": states_fw, "states_bw": states_bw},
    n=1,
    feed_dict=None)

print(result[0]["output_fw"].shape)
print(result[0]["output_bw"].shape)
print(result[0]["states_fw"].h.shape)
print(result[0]["states_bw"].h.shape)

ValueError: Attempt to reuse RNNCell <tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl.LSTMCell object at 0x7f1ecfc4f950> with a different variable scope than its first use.  First use of cell was with scope 'bidirectional_rnn/fw/lstm_cell', this attempt is with scope 'bidirectional_rnn/bw/lstm_cell'.  Please create a new instance of the cell if you would like it to use a different set of weights.  If before you were using: MultiRNNCell([LSTMCell(...)] * num_layers), change to: MultiRNNCell([LSTMCell(...) for _ in range(num_layers)]).  If before you were using the same cell instance as both the forward and reverse cell of a bidirectional RNN, simply create two instances (one for forward, one for reverse).  In May 2017, we will start transitioning this cell's behavior to use existing stored weights, if any, when it is called with scope=None (which can lead to silent model degradation, so this error will remain until then.)

In [2]:
def describe(func):
    ''' wrap function,to add some descriptions for function and its running time
    '''
    @wraps(func)
    def wrapper(*args, **kwargs):
        print(func.__name__+'...')
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(str(func.__name__+' in '+ str(end-start)+' s'))
        return result
    return wrapper

# load 字典表 

In [3]:
def loadatcvocab(vocabfile='atcvocab.txt'):
    char_map = {'<UNK>':0}
    index_map = {0:'<UNK>'}
    with open(vocabfile, 'r') as f:
        for line in f.readlines():
            #print(line)
            d = json.loads(line)
            index_map[d['key']] = d['word']
            char_map[d['word']] = d['key']
            #print(d['key'],d['word'])
    return char_map,index_map

In [4]:
def output_to_sequence(lmt ,vocab):
    ''' convert the output into sequences of characters or phonemes
    '''
   
    sequences = []
    start = 0
    sequences.append([])
    for i in range(len(lmt[0])):
        if lmt[0][i][0] == start:
            sequences[start].append(lmt[1][i])
        else:
            start = start + 1
            sequences.append([])

    #here, we only print the first sequence of batch
    indexes = sequences[0] #here, we only print the first sequence of batch

    if vocab != None:
        seq = []
        for ind in indexes:
             seq.append(vocab[ind])
        seq = ''.join(seq)
        return seq
    else:
        raise TypeError('vocabaray should be exists!')


In [5]:
@describe
def load_batched_data(mfccPath, labelPath, batchSize,maxTimeSteps=None,npz=False):
    '''returns 3-element tuple: batched data (list), maxTimeLength (int), and
       total number of samples (int)
       if maxTimeSteps not None ，then use max(maxTimeSteps,localmaxlen),temp use only,to be change per batch max
    '''
    if not npz:
        return data_lists_to_batches([np.load(os.path.join(mfccPath, fn)) for fn in os.listdir(mfccPath)],
                                 [np.load(os.path.join(labelPath, fn)) for fn in os.listdir(labelPath)],
                                 batchSize, maxTimeSteps) + \
                                (len(os.listdir(mfccPath)),)
    else:
        inputlist = []
        targetlist = []
        for fn in os.listdir(mfccPath):
            it = np.load(os.path.join(mfccPath, fn))
            inputlist.append(it['feature'])
            targetlist.append(it['label'])
        return data_lists_to_batches(inputlist,targetlist, batchSize, maxTimeSteps) +  (len(os.listdir(mfccPath)),)   

inputList为num-uttences个（num-feature，timeseqs）列表

In [6]:
def data_lists_to_batches(inputList, targetList, batchSize, maxTimeSteps=None):
    ''' padding the input list to a same dimension, integrate all data into batchInputs
    '''
    assert len(inputList) == len(targetList)
    # dimensions of inputList:batch*39*time_length
    print('data_lists_to_batches',len(inputList),inputList[0].shape)
    nFeatures = inputList[0].shape[0]
    maxLength = 0
    for inp in inputList:
        # find the max time_length
        maxLength = max(maxLength, inp.shape[1])

    if(maxTimeSteps):
        maxLength = max(maxLength, maxTimeSteps)
        
    # randIxs is the shuffled index from range(0,len(inputList))
    randIxs = np.random.permutation(len(inputList))
    start, end = (0, batchSize)
    dataBatches = []

    while end <= len(inputList):
        # batchSeqLengths store the time-length of each sample in a mini-batch
        batchSeqLengths = np.zeros(batchSize)

        # randIxs is the shuffled index of input list
        for batchI, origI in enumerate(randIxs[start:end]):
            batchSeqLengths[batchI] = inputList[origI].shape[-1]

        batchInputs = np.zeros((maxLength, batchSize, nFeatures))
        batchTargetList = []
        for batchI, origI in enumerate(randIxs[start:end]):
            # padSecs is the length of padding
            padSecs = maxLength - inputList[origI].shape[1]
            # numpy.pad pad the inputList[origI] with zeos at the tail
            batchInputs[:,batchI,:] = np.pad(inputList[origI].T, ((0,padSecs),(0,0)), 'constant', constant_values=0)
            # target label
            batchTargetList.append(targetList[origI])
        dataBatches.append((batchInputs, list_to_sparse_tensor(batchTargetList), batchSeqLengths))
        start += batchSize
        end += batchSize
    return (dataBatches, maxLength)


In [7]:
def list_to_sparse_tensor(targetList):
    ''' turn 2-D List to SparseTensor
    '''
    indices = [] #index
    vals = [] #value     
    for tI, target in enumerate(targetList):
        for seqI, val in enumerate(target):
            indices.append([tI, seqI])
            vals.append(val)
    shape = [len(targetList), np.asarray(indices).max(axis=0)[1]+1] #shape
    return (np.array(indices), np.array(vals), np.array(shape))
   

In [8]:
def build_multi_dynamic_brnn(args,
                             maxTimeSteps,
                             inputX,
                             cell_fn,
                             seqLengths,
                             time_major=True):
    hid_input = inputX
    print(hid_input.shape)
    # add conv 
    #with tf.variable_scope('conv1'):
    #	inpt = tf.reshape(inputX,[maxTimeSteps,args.batch_size,args.num_feature,1])
    #	conv1 = build_conv_layer(inpt,[3,3,1,1],1,name='conv1')
    #	hid_input = tf.reshape(conv1,[maxTimeSteps,args.batch_size,args.num_feature])
    #print(hid_input.shape)
    # end conv
    for i in range(args.num_layer):
        scope = 'DBRNN_' + str(i + 1)
        forward_cell = cell_fn(args.num_hidden, activation=args.activation)
        backward_cell = cell_fn(args.num_hidden, activation=args.activation)
        # tensor of shape: [max_time, batch_size, input_size]

        outputs, output_states = bidirectional_dynamic_rnn(forward_cell, backward_cell,
                                                           inputs=hid_input,
                                                           dtype=tf.float32,
                                                           sequence_length=seqLengths,
                                                           time_major=True,
                                                           scope=scope)
        # forward output, backward ouput
        # tensor of shape: [max_time, batch_size, input_size]
        output_fw, output_bw = outputs
        # forward states, backward states
        output_state_fw, output_state_bw = output_states
        

        # output_fb = tf.concat(2, [output_fw, output_bw])
        output_fb = tf.concat([output_fw, output_bw], 2)
        print(output_fb.shape)
        shape = output_fb.get_shape().as_list()
        print('shape:',shape[0],shape[1],shape[2])
        if(shape[0]):
            output_fb = tf.reshape(output_fb, [shape[0], shape[1], 2, int(shape[2] / 2)])
        else:
            output_fb = tf.reshape(output_fb, [-1, shape[1], 2, int(shape[2] / 2)])
        hidden = tf.reduce_sum(output_fb, 2)
        hidden = tf.contrib.layers.dropout(hidden, keep_prob=args.keep_prob, is_training=(True))
        
        print(scope)        
        print('inputs',hid_input)
        print('outputs',output_fw,output_bw)
        print('outputs_state',output_state_fw,output_state_bw)
        print('output_fb',output_fb,shape)
        
        if i != args.num_layer - 1:
            hid_input = hidden
        else:
            outputXrs = tf.reshape(hidden, [-1, args.num_hidden])
            print('outputXrs shape:',outputXrs.shape)
            # output_list = tf.split(0, maxTimeSteps, outputXrs)
            output_list = tf.split(outputXrs, maxTimeSteps, 0)
            print('output_list len:',len(output_list))
            fbHrs = [tf.reshape(t, [args.batch_size, args.num_hidden]) for t in output_list]
            
    return fbHrs


In [9]:
class dotdict(dict):
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

In [10]:
num_epochs = 1
batch_size=8
num_feature=39
num_hidden = 512
num_classes=686
num_layer=3

In [11]:
batchedData, maxTimeSteps, totalN = load_batched_data('atc_test_feature/train/0/','atc_test_feature/train/0/',batch_size,npz=True)

load_batched_data...
data_lists_to_batches 12148 (39, 109)
load_batched_data in 8.99689483643 s


In [23]:
devbatchedData, devmaxTimeSteps, devtotalN = load_batched_data('atc_test_feature/dev/0','atc_test_feature/dev/0',batch_size,maxTimeSteps,True)

load_batched_data...
data_lists_to_batches 1519 (39, 193)
load_batched_data in 1.03461289406 s


In [24]:
_,int_map = loadatcvocab('feature/atc/atcvocab.txt')

In [25]:
print(len(int_map))
print(len(batchedData),len(batchedData[0]),maxTimeSteps,totalN)
print(len(devbatchedData),len(devbatchedData[0]),devmaxTimeSteps,devtotalN)

685
1518 3 995 12148
189 3 995 1519


In [26]:
graph = tf.Graph()

args ={'level': 'cha',
              'rnncell': tf.contrib.rnn.GRUCell,
              'batch_size': batch_size,
              'num_hidden': num_hidden,
              'num_feature': num_feature,
              'num_class': num_classes,
              'num_layer': num_layer,
              'activation': tf.nn.relu,
              'optimizer': tf.train.AdamOptimizer,
              'learning_rate': 0.0001,
              'keep_prob': 0.9,
              'grad_clip': 1,
              'mode':'train'
            }
args = dotdict(args)

with graph.as_default():
    inputX = tf.placeholder(tf.float32,shape=(None, batch_size, num_feature),name='inputX')  
    inputXrs = tf.reshape(inputX, [-1, num_feature])
    #inputList = tf.split(inputXrs, maxTimeSteps, 0)  # convert inputXrs from [32*maxL,39] to [32,maxL,39]
    targetIxs = tf.placeholder(tf.int64,name='targetIxs')
    targetVals = tf.placeholder(tf.int32,name='targetVals')
    targetShape = tf.placeholder(tf.int64,name='targetShape')
    targetY = tf.SparseTensor(targetIxs, targetVals, targetShape)
    seqLengths = tf.placeholder(tf.int32, shape=(batch_size),name='seqLengths')
    
    fbHrs = build_multi_dynamic_brnn(args, maxTimeSteps, inputX, args.rnncell, seqLengths)  
    


(?, 8, 39)
(?, 8, 1024)
shape: None 8 1024
DBRNN_1
inputs Tensor("inputX:0", shape=(?, 8, 39), dtype=float32)
outputs Tensor("DBRNN_1/fw/fw/TensorArrayStack/TensorArrayGatherV3:0", shape=(?, 8, 512), dtype=float32) Tensor("ReverseSequence:0", shape=(?, 8, 512), dtype=float32)
outputs_state Tensor("DBRNN_1/fw/fw/while/Exit_2:0", shape=(?, 512), dtype=float32) Tensor("DBRNN_1/bw/bw/while/Exit_2:0", shape=(?, 512), dtype=float32)
output_fb Tensor("Reshape_1:0", shape=(?, 8, 2, 512), dtype=float32) [None, 8, 1024]
(?, 8, 1024)
shape: None 8 1024
DBRNN_2
inputs Tensor("Dropout/dropout/mul:0", shape=(?, 8, 512), dtype=float32)
outputs Tensor("DBRNN_2/fw/fw/TensorArrayStack/TensorArrayGatherV3:0", shape=(?, 8, 512), dtype=float32) Tensor("ReverseSequence_1:0", shape=(?, 8, 512), dtype=float32)
outputs_state Tensor("DBRNN_2/fw/fw/while/Exit_2:0", shape=(?, 512), dtype=float32) Tensor("DBRNN_2/bw/bw/while/Exit_2:0", shape=(?, 512), dtype=float32)
output_fb Tensor("Reshape_2:0", shape=(?, 8, 2, 

In [27]:
print(len(fbHrs))

995


In [28]:
with graph.as_default():
    with tf.name_scope('fc-layer'):
                with tf.variable_scope('fc'):
                    weightsClasses = tf.Variable(tf.truncated_normal([args.num_hidden, args.num_class], name='weightsClasses'))
                    biasesClasses = tf.Variable(tf.zeros([args.num_class]), name='biasesClasses')
                    logits = [tf.matmul(t, weightsClasses) + biasesClasses for t in fbHrs]
    print(len(logits),logits[0])                    
    logits3d = tf.stack(logits)
    print(logits3d)
    loss = tf.reduce_mean(tf.nn.ctc_loss(targetY, logits3d, seqLengths))
    var_op = tf.global_variables()
    var_trainable_op = tf.trainable_variables()    
    
    if args.grad_clip == -1:
        optimizer = tf.train.AdamOptimizer(args.learning_rate).minimize(loss)
    else:        
        grads, _ = tf.clip_by_global_norm(tf.gradients(loss, var_trainable_op), args.grad_clip)
        opti = tf.train.AdamOptimizer(args.learning_rate)
        optimizer = opti.apply_gradients(zip(grads, var_trainable_op))
    
    
    predictions = tf.to_int32(tf.nn.ctc_beam_search_decoder(logits3d, seqLengths, merge_repeated=False)[0][0])
    
    initial_op = tf.global_variables_initializer()
    
    errorRate = tf.reduce_sum(tf.edit_distance(predictions, targetY, normalize=True))

995 Tensor("fc-layer/fc/add:0", shape=(8, 686), dtype=float32)
Tensor("stack:0", shape=(995, 8, 686), dtype=float32)


In [None]:

with tf.Session(graph=graph) as sess:
    sess.run(initial_op)
    iters = 0
    for epoch in range(num_epochs):
        print('Epoch {}...'.format(epoch+1))
        batchErrors = np.zeros(len(batchedData))
        batchRandIxs = np.random.permutation(len(batchedData))
        #print(batchErrors.shape,batchRandIxs)
        train_start_time = time.time()
        for batch, batchOrigI in enumerate(batchRandIxs):           
            batchInputs, batchTargetSparse, batchSeqLengths = batchedData[batchOrigI]
            batchTargetIxs, batchTargetVals, batchTargetShape = batchTargetSparse
            #print(len(batchInputs),batchSeqLengths)
            feedDict = {inputX: batchInputs,
                        targetIxs: batchTargetIxs,
                        targetVals: batchTargetVals,
                        targetShape: batchTargetShape,
                        seqLengths: batchSeqLengths}
            #print(type(feedDict[inputX]),targetIxs,targetVals,targetShape,seqLengths)
            _, bloss, pre, y, er = sess.run([optimizer, loss,predictions, targetY, errorRate],feed_dict=feedDict)
            
            batchErrors[batch] = er
            iters +=1
            
            if batch % 10 == 0 and iters % 50 !=0 :  
                train_end_time = time.time()
                print('total:{},batch:{}/{},epoch:{}/{},usetime:{}s,train loss={:.3f},mean train CER={:.3f}'.format(
                                    totalN, batch+1, len(batchRandIxs), epoch+1, num_epochs, str(train_end_time-train_start_time),bloss, er/batch_size))
                train_start_time = time.time()
                   
            
            if iters % 50 == 0:     
                print('begin dev test....')
                devbatchnum = min(len(devbatchedData),10)
                devbatchErrors = np.zeros(devbatchnum)
                devbatchRandIxs = np.random.permutation(devbatchnum)
                avg_loss = 0.0                
                for dbatch in range(devbatchnum):
                    dbatchInputs, dbatchTargetSparse, dbatchSeqLengths = devbatchedData[devbatchRandIxs[dbatch]]
                    dbatchTargetIxs, dbatchTargetVals, dbatchTargetShape = dbatchTargetSparse
                    devfeedDict = {inputX: dbatchInputs,
                                 targetIxs: dbatchTargetIxs,
                                 targetVals: dbatchTargetVals,
                                 targetShape: dbatchTargetShape,
                                 seqLengths: dbatchSeqLengths}
                    dloss, dpre, dy, der = sess.run([loss, predictions,targetY,errorRate], feed_dict=devfeedDict)
                    devbatchErrors[dbatch] = der
                    avg_loss += dloss
                    if iters % 200 == 0:
                        print('Truth :' + output_to_sequence(dy,int_map))
                        print('Output:' + output_to_sequence(dpre,int_map))
                avg_loss /= devbatchnum
                avg_cer = np.mean(devbatchErrors)/ devbatchnum
                print('after {} batch dev,avg_loss:{},CER:{}'.format(devbatchnum,avg_loss,avg_cer))
            


Epoch 1...
total:12148,batch:1/1518,epoch:1/1,usetime:6.17603707314s,train loss=3098.043,mean train CER=17.799
total:12148,batch:11/1518,epoch:1/1,usetime:38.3824899197s,train loss=1143.410,mean train CER=3.494


In [None]:
print('ctc_lost parms:',targetY,logits3d,seqLengths)
print(type(optimizer),type(predictions),type(initial_op))
print(sess)

In [126]:
tf.summary.FileWriter('testDBiRNNlog',graph).close()

In [62]:
print(type(fbHrs),len(fbHrs),fbHrs[0])  

<type 'list'> 971 Tensor("Reshape_5:0", shape=(16, 512), dtype=float32)


In [27]:
print(inputX,inputXrs,inputList[0])

Tensor("inputX:0", shape=(971, 16, 39), dtype=float32) Tensor("Reshape_3:0", shape=(15536, 39), dtype=float32) Tensor("split_3:0", shape=(16, 39), dtype=float32)


In [28]:
print(targetIxs,targetVals,targetShape)
print(targetY,seqLengths)
print(seqLengths)

Tensor("targetIxs:0", dtype=int64) Tensor("targetVals:0", dtype=int32) Tensor("targetShape:0", dtype=int64)
SparseTensor(indices=Tensor("targetIxs:0", dtype=int64), values=Tensor("targetVals:0", dtype=int32), dense_shape=Tensor("targetShape:0", dtype=int64)) Tensor("seqLengths_1:0", shape=(16,), dtype=int32)
Tensor("seqLengths_1:0", shape=(16,), dtype=int32)


In [80]:
a = tf.placeholder(tf.float32,shape=[971,16,512])
b = tf.placeholder(tf.float32,shape=[971,16,512])


In [92]:
ab=tf.concat([a,b],2)
ab
shape = ab.get_shape().as_list()
shape
cd = tf.reshape(ab,[shape[0], shape[1], 2, int(shape[2] / 2)])

In [100]:

ef = tf.reduce_sum(cd,2)
cd

<tf.Tensor 'Reshape:0' shape=(971, 16, 2, 512) dtype=float32>

In [101]:
ef

<tf.Tensor 'Sum_4:0' shape=(971, 16, 512) dtype=float32>

In [102]:
aa=tf.contrib.layers.dropout(ef, keep_prob=0.9, is_training=(True))

In [103]:
aa

<tf.Tensor 'Dropout/dropout/mul:0' shape=(971, 16, 512) dtype=float32>