In [2]:
import numpy as np
import tensorflow as tf
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from tensorflow.python.ops import ctc_ops
from collections import Counter
import os, time, glob, json, torch

tf.compat.v1.disable_eager_execution()

tf.compat.v1.reset_default_graph()

In [3]:
physical_gpus = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_virtual_device_configuration(
    physical_gpus[0],
    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)] 
)
logical_gpus = tf.config.list_logical_devices("GPU")

2022-07-01 14:57:00.228866: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-07-01 14:57:00.288189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-01 14:57:00.288310: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 3090 computeCapability: 8.6
coreClock: 1.695GHz coreCount: 82 deviceMemorySize: 23.70GiB deviceMemoryBandwidth: 871.81GiB/s
2022-07-01 14:57:00.288495: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-07-01 14:57:00.289448: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-07-01 14:57:00.290372: I tensorflow/stream_executor/pl

In [4]:
#All
def get_wavs(wav_path):
    wavs = []
    for (dirpath, dirnames, filenames) in os.walk(wav_path):
        for filename in filenames:
            if filename.endswith('.wav') or filename.endswith('.WAV'):
                # print(filename)
                filename_path = os.path.join(dirpath, filename)
                # print(filename_path)
                wavs.append(filename_path)
    return wavs
 
 
# 获取wav文件对应的翻译文字
def get_tran_texts(wavs, tran_path):
    tran_texts = []
    for wav_file in wavs:
        basename = os.path.basename(wav_file)
        wav_filename = os.path.splitext(basename)[0]
        for tran_file in glob.glob(os.path.join(tran_path, wav_filename + '.json')):
            if os.path.exists(tran_file) is False:
                continue
            try:
                with open(tran_file, encoding='utf-8') as fd:
                    data = json.load(fd)
                    a = 0
                    b = 0
                    c = 0
                    d = 0
                    if data['command'] == 'open':
                        a = 1
                    elif data['command'] == 'close':
                        b = 1
                    elif data['command'] == 'hold':
                        c = 1
                    else:
                        d = 1
                    data = [a,b,c,d]
                    tran_texts.append(data)
                    fd.close()
            except ValueError:
                    print(tran_file)
    return tran_texts
 
 
# 获取wav和对应的翻译文字
def get_wavs_and_tran_texts(wav_path, tran_path):
    wavs = get_wavs(wav_path)
    tran_texts = get_tran_texts(wavs, tran_path)
 
    return wavs, tran_texts 

wav_path = 'dataset/data_sr/train'
label_file = 'dataset/data_sr/data'

wavs, labels = get_wavs_and_tran_texts(wav_path,label_file)

# Constants
SPACE_TOKEN = '<space>'
SPACE_INDEX = 0
FIRST_INDEX = ord('a') - 1  # 0 is reserved to space

# 将稀疏矩阵的字向量转成文字
# tuple是sparse_tuple函数的返回值
def sparse_tuple_to_text(tuple, words):
    # 索引
    indices = tuple[0]
    # 字向量
    values = tuple[1]
    results = [''] * tuple[2][0]
    for i in range(len(indices)):
        index = indices[i][0]
        c = values[i]
        c = ' ' if c == SPACE_INDEX else words[c]
        results[index] = results[index] + c
 
    return results
 
 
# 将密集矩阵的字向量转成文字
def dense_to_text(value, words):
    results = ''
    for i in range(len(value)):
        results += words[value[i]]  # chr(value[i] + FIRST_INDEX)
    return results.replace('`', ' ')
 
 
# 创建序列的稀疏表示
def sparse_tuple(sequences, dtype=np.int32):
    indices = []
    values = []
 
    for n, seq in enumerate(sequences):
        indices.extend(zip([n] * len(seq), range(len(seq))))
        values.extend(seq)
 
    indices = np.asarray(indices, dtype=np.int64)
    values = np.asarray(values, dtype=dtype)
    shape = np.asarray([len(sequences), indices.max(0)[1] + 1], dtype=np.int64)
 
    # return tf.SparseTensor(indices=indices, values=values, shape=shape)
    return indices, values, shape
 
 
# 将音频数据转为时间序列（列）和MFCC（行）的矩阵，将对应的译文转成字向量
def get_mfccs_and_transcriptch(txt_files, wavs, n_input, contexts, chars_map, txt_labels=None):
    audio = []
    audio_len = []
    transcript = []
    transcript_len = []
    if txt_files != None:
        txt_labels = txt_files
 
    for txt_obj, wav_file in zip(txt_labels, wavs):
        # load audio and convert to features
        audio_data = audiofile_to_input_vector(wav_file, n_input, contexts)
        try:
            audio_data = audio_data.astype('float32')
            # print(chars_map)
            audio.append(audio_data)
            audio_len.append(np.int32(len(audio_data)))

            # load text transcription and convert to numerical array
            target = []
            if txt_files != None:  # txt_obj是文件
                target = get_labels_vector(txt_obj, chars_map)
            else:
                target = get_labels_vector(None, chars_map, txt_obj)  # txt_obj是labels
            # target = text_to_char_array(target)
            transcript.append(target)
            transcript_len.append(len(target))
        except AttributeError:
            print(audio_data)
 
    audio = np.asarray(audio,dtype=object)
    audio_len = np.asarray(audio_len)
    transcript = np.asarray(transcript)
    transcript_len = np.asarray(transcript_len)
    return audio, audio_len, transcript, transcript_len
 
 
# 将字符转成向量，其实就是根据字找到字在chars_map中所应对的下标
def get_labels_vector(txt_file, chars_map, txt_label=None):
    words_size = len(chars_map)
 
    to_num = lambda word: chars_map.get(word, words_size)
 
    if txt_file != None:
        txt_label = get_ch_lable(txt_file)
 
    # print(txt_label)
    labels_vector = list(map(to_num, txt_label))
    # print(labels_vector)
    return labels_vector
 
 
def get_ch_lable(txt_file):
    labels = ""
    with open(txt_file, 'rb') as f:
        for label in f:
            # labels =label.decode('utf-8')
            labels = labels + label.decode('gb2312')
            # labels.append(label.decode('gb2312'))
 
    return labels
 
 
# 将音频信息转成MFCC特征
# 参数说明---audio_filename：音频文件   numcep：梅尔倒谱系数个数
#       numcontext：对于每个时间段，要包含的上下文样本个数
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    # 加载音频文件
    try:
        fs, audio = wav.read(audio_filename)
        # 获取MFCC系数
        orig_inputs = mfcc(audio, samplerate=fs, numcep=numcep)
        # 打印MFCC系数的形状，得到比如(955, 26)的形状
        # 955表示时间序列，26表示每个序列的MFCC的特征值为26个
        # 这个形状因文件而异，不同文件可能有不同长度的时间序列，但是，每个序列的特征值数量都是一样的
        #print('orig_inputs shape', np.shape(orig_inputs))

        # 因为我们使用双向循环神经网络来训练,它的输出包含正、反向的结
        # 果,相当于每一个时间序列都扩大了一倍,所以
        # 为了保证总时序不变,使用orig_inputs =
        # orig_inputs[::2]对orig_inputs每隔一行进行一次
        # 取样。这样被忽略的那个序列可以用后文中反向
        # RNN生成的输出来代替,维持了总的序列长度。
        orig_inputs = orig_inputs[::2]  # (478, 26)
        # print(np.shape(orig_inputs))
        # 因为我们讲解和实际使用的numcontext=9，所以下面的备注我都以numcontext=9来讲解
        # 这里装的就是我们要返回的数据，因为同时要考虑前9个和后9个时间序列，
        # 所以每个时间序列组合了19*26=494个MFCC特征数
        train_inputs = np.array([], np.float32)
        train_inputs.resize((orig_inputs.shape[0], numcep + 2 * numcep * numcontext))
        #print('train_inputs shape', np.shape(train_inputs))#)(478, 494)

        # Prepare pre-fix post fix context
        empty_mfcc = np.array([])
        empty_mfcc.resize((numcep))

        # Prepare train_inputs with past and future contexts
        # time_slices保存的是时间切片，也就是有多少个时间序列
        time_slices = range(train_inputs.shape[0])

        # context_past_min和context_future_max用来计算哪些序列需要补零
        context_past_min = time_slices[0] + numcontext
        context_future_max = time_slices[-1] - numcontext

        # 开始遍历所有序列
        for time_slice in time_slices:
            # 对前9个时间序列的MFCC特征补0，不需要补零的，则直接获取前9个时间序列的特征
            need_empty_past = max(0, (context_past_min - time_slice))
            empty_source_past = list(empty_mfcc for empty_slots in range(need_empty_past))
            data_source_past = orig_inputs[max(0, time_slice - numcontext):time_slice]
            assert (len(empty_source_past) + len(data_source_past) == numcontext)

            # 对后9个时间序列的MFCC特征补0，不需要补零的，则直接获取后9个时间序列的特征
            need_empty_future = max(0, (time_slice - context_future_max))
            empty_source_future = list(empty_mfcc for empty_slots in range(need_empty_future))
            data_source_future = orig_inputs[time_slice + 1:time_slice + numcontext + 1]
            assert (len(empty_source_future) + len(data_source_future) == numcontext)

            # 前9个时间序列的特征
            if need_empty_past:
                past = np.concatenate((empty_source_past, data_source_past))
            else:
                past = data_source_past
            # 后9个时间序列的特征
            if need_empty_future:
                future = np.concatenate((data_source_future, empty_source_future))
            else:
                future = data_source_future

            # 将前9个时间序列和当前时间序列以及后9个时间序列组合
            past = np.reshape(past, numcontext * numcep)
            now = orig_inputs[time_slice]
            future = np.reshape(future, numcontext * numcep)

            train_inputs[time_slice] = np.concatenate((past, now, future))
            assert (len(train_inputs[time_slice]) == numcep + 2 * numcep * numcontext)

        # 将数据使用正太分布标准化，减去均值然后再除以方差
        train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)

        return train_inputs
    except ValueError:
        print(audio_filename)
    
 
#对齐处理
def pad_sequences(sequences, maxlen=None, dtype=np.float32,
                  padding='post', truncating='post', value=0.):
    #[478 512 503 406 481 509 422 465]
    lengths = np.asarray([len(s) for s in sequences], dtype=np.int64)
 
    nb_samples = len(sequences)
 
    #maxlen，该批次中，最长的序列长度
    if maxlen is None:
        maxlen = np.max(lengths)
 
    # 在下面的主循环中，从第一个非空序列中获取样本形状以检查一致性
    sample_shape = tuple()
    for s in sequences:
        if len(s) > 0:
            sample_shape = np.asarray(s).shape[1:]
            break
 
    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
    for idx, s in enumerate(sequences):
        if len(s) == 0:
            continue  # 序列为空，跳过
 
        #post表示后补零，pre表示前补零
        if truncating == 'pre':
            trunc = s[-maxlen:]
        elif truncating == 'post':
            trunc = s[:maxlen]
        else:
            raise ValueError('Truncating type "%s" not understood' % truncating)
 
        # check `trunc` has expected shape
        trunc = np.asarray(trunc, dtype=dtype)
        if trunc.shape[1:] != sample_shape:
            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
                             (trunc.shape[1:], idx, sample_shape))
 
        if padding == 'post':
            x[idx, :len(trunc)] = trunc
        elif padding == 'pre':
            x[idx, -len(trunc):] = trunc
        else:
            raise ValueError('Padding type "%s" not understood' % padding)
 
    return x, lengths

# 字表
all_words = []
for label in labels:
    #print(label)
    all_words += [word for word in label]
counter = Counter(all_words)
words = sorted(counter)
words_size= len(words)
chars_map = dict(zip(words, range(words_size)))
 
print('字表大小:', words_size)

# 梅尔倒谱系数的个数
n_input = 26
# 对于每个时间序列，要包含上下文样本的个数
contexts = 9
# batch大小
batch_size = 32
 

def next_batch(wavs, labels, start_idx=0, batch_size=1):
    filesize = len(labels)
    # 计算要获取的序列的开始和结束下标
    end_idx = min(filesize, start_idx + batch_size)
    idx_list = range(start_idx, end_idx)
    # 获取要训练的音频文件路径和对于的译文
    txt_labels = [labels[i] for i in idx_list]
    wavs = [wavs[i] for i in idx_list]
    # 将音频文件转成要训练的数据
    (source, audio_len, target, transcript_len) = get_mfccs_and_transcriptch(None,
                                                                             wavs,
                                                                             n_input,
                                                                             contexts, chars_map, txt_labels)
 
    start_idx += batch_size
    # Verify that the start_idx is not largVerify that the start_idx is not ler than total available sample size
    if start_idx >= filesize:
        start_idx = -1
 
    # Pad input to max_time_step of this batch
    # 如果多个文件将长度统一，支持按最大截断或补0
    source, source_lengths = pad_sequences(source)
    # 返回序列的稀疏表示
    sparse_labels = sparse_tuple(target)
 
    return start_idx, source, source_lengths, sparse_labels

b_stddev = 0.046875
h_stddev = 0.046875
 
n_hidden = 1024
n_hidden_1 = 1024
n_hidden_2 = 1024
n_hidden_5 = 1024
n_cell_dim = 1024
n_hidden_3 = 2 * 1024
 
keep_dropout_rate = 0.95
relu_clip = 20
 
"""
used to create a variable in CPU memory.
"""
def variable_on_cpu(name, shape, initializer):
    # Use the /cpu:0 device for scoped operations
    with tf.device('/CPU:0'):
        # Create or get apropos variable
        var = tf.compat.v1.get_variable(name=name, shape=shape, initializer=initializer)
    return var
 
 
def BiRNN_model(batch_x, seq_length, n_input, contexts, n_character, keep_dropout):
    # batch_x_shape: [batch_size, amax_stepsize, n_input + 2 * n_input * contexts]
    batch_x_shape = tf.shape(batch_x)
 
    # 将输入转成时间序列优先
    batch_x = tf.transpose(batch_x, [1, 0, 2])
    # 再转成2维传入第一层
    # [amax_stepsize * batch_size, n_input + 2 * n_input * contexts]
    batch_x = tf.reshape(batch_x, [-1, n_input + 2 * n_input * contexts])
 
    # 使用clipped RELU activation and dropout.
    # 1st layer
    with tf.name_scope('fc1'):
        b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer(stddev=b_stddev))
        h1 = variable_on_cpu('h1', [n_input + 2 * n_input * contexts, n_hidden_1],
                             tf.random_normal_initializer(stddev=h_stddev))
        layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)
        layer_1 = tf.nn.dropout(layer_1, keep_dropout)
 
    # 2nd layer
    with tf.name_scope('fc2'):
        b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer(stddev=b_stddev))
        h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer(stddev=h_stddev))
        layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)
        layer_2 = tf.nn.dropout(layer_2, keep_dropout)
 
    # 3rd layer
    with tf.name_scope('fc3'):
        b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer(stddev=b_stddev))
        h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer(stddev=h_stddev))
        layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)
        layer_3 = tf.nn.dropout(layer_3, keep_dropout)
 
    # 双向rnn
    with tf.name_scope('lstm'):
        # Forward direction cell:
        lstm_fw_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(n_cell_dim, forget_bias=1.0, state_is_tuple=True)
        lstm_fw_cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm_fw_cell,
                                                     input_keep_prob=keep_dropout)
        # Backward direction cell:
        lstm_bw_cell = tf.compat.v1.nn.rnn_cell.BasicLSTMCell(n_cell_dim, forget_bias=1.0, state_is_tuple=True)
        lstm_bw_cell = tf.compat.v1.nn.rnn_cell.DropoutWrapper(lstm_bw_cell,
                                                     input_keep_prob=keep_dropout)
 
        # `layer_3`  `[amax_stepsize, batch_size, 2 * n_cell_dim]`
        layer_3 = tf.reshape(layer_3, [-1, batch_x_shape[0], n_hidden_3])
 
        outputs, output_states = tf.compat.v1.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                                                 cell_bw=lstm_bw_cell,
                                                                 inputs=layer_3,
                                                                 dtype=tf.float32,
                                                                 time_major=True,
                                                                 sequence_length=seq_length)
 
        # 连接正反向结果[amax_stepsize, batch_size, 2 * n_cell_dim]
        outputs = tf.concat(outputs, 2)
        # to a single tensor of shape [amax_stepsize * batch_size, 2 * n_cell_dim]
        outputs = tf.reshape(outputs, [-1, 2 * n_cell_dim])
 
    with tf.name_scope('fc5'):
        b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer(stddev=b_stddev))
        h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer(stddev=h_stddev))
        layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)
        layer_5 = tf.nn.dropout(layer_5, keep_dropout)
 
    with tf.name_scope('fc6'):
        # 全连接层用于softmax分类
        b6 = variable_on_cpu('b6', [n_character], tf.random_normal_initializer(stddev=b_stddev))
        h6 = variable_on_cpu('h6', [n_hidden_5, n_character], tf.random_normal_initializer(stddev=h_stddev))
        layer_6 = tf.add(tf.matmul(layer_5, h6), b6)
 
    # 将2维[amax_stepsize * batch_size, n_character]转成3维 time-major [amax_stepsize, batch_size, n_character].
    layer_6 = tf.reshape(layer_6, [-1, batch_x_shape[0], n_character], name="pred")
    print('n_character:' + str(n_character))
    # Output shape: [amax_stepsize, batch_size, n_character]
    return layer_6
 
# input_tensor为输入音频数据，由前面分析可知，它的结构是[batch_size, amax_stepsize, n_input + (2 * n_input * contexts)]
#其中，batch_size是batch的长度，amax_stepsize是时序长度，n_input + (2 * n_input * contexts)是MFCC特征数，
#batch_size是可变的，所以设为None，由于每一批次的时序长度不固定，所有，amax_stepsize也设为None
input_tensor = tf.compat.v1.placeholder(tf.float32, [None, None, n_input + (2 * n_input * contexts)], name='input')
# Use sparse_placeholder; will generate a SparseTensor, required by ctc_loss op.
#targets保存的是音频数据对应的文本的系数张量，所以用sparse_placeholder创建一个稀疏张量
targets = tf.compat.v1.sparse_placeholder(tf.int32, name='label')
#seq_length保存的是当前batch数据的时序长度
seq_length = tf.compat.v1.placeholder(tf.int32, [None], name='seq_length')
#keep_dropout则是dropout的参数
keep_dropout= tf.compat.v1.placeholder(tf.float32, name="keep_dropout")
 
# logits is the non-normalized output/activations from the last layer.
# logits will be input for the loss function.
# nn_model is from the import statement in the load_model function
logits = BiRNN_model(input_tensor, tf.compat.v1.to_int64(seq_length), n_input, contexts, words_size + 1, keep_dropout)
 
# 使用ctc loss计算损失
avg_loss = tf.reduce_mean(ctc_ops.ctc_loss(targets, logits, seq_length))
 
# 优化器
learning_rate = 0.001
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate).minimize(avg_loss)
 
# 使用CTC decoder
with tf.name_scope("decode"):
    decoded, log_prob = ctc_ops.ctc_beam_search_decoder(logits, seq_length, merge_repeated=False)
 
# 计算编辑距离
with tf.name_scope("accuracy"):
    distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)
    # 计算label error rate (accuracy)
    ler = tf.reduce_mean(distance, name='label_error_rate')
 
#迭代次数
epochs = 214
#模型保存地址
savedir = "model/"
#如果该目录不存在，新建
if os.path.exists(savedir) == False:
    os.mkdir(savedir)

# 生成saver
saver = tf.compat.v1.train.Saver(max_to_keep=1)
# 创建session
with tf.compat.v1.Session() as sess:
    #初始化
    sess.run(tf.compat.v1.global_variables_initializer())
    # 没有模型的话，就重新初始化
    kpt = tf.compat.v1.train.latest_checkpoint(savedir)
    print("kpt:", kpt)
    startepo = 0
    if kpt != None:
        saver.restore(sess, kpt)
        ind = kpt.find("-")
        startepo = int(kpt[ind + 1:])
        print(startepo)
 
    # 准备运行训练步骤
    section = '\n{0:=^40}\n'
    print(section.format('Run training epoch'))
 
    train_start = time.time()
    for epoch in range(epochs):  # 样本集迭代次数
        epoch_start = time.time()
        if epoch < startepo:
            continue
 
        print("epoch start:", epoch, "total epochs= ", epochs)
        #######################run batch####
        n_batches_per_epoch = int(np.ceil(len(labels) / batch_size))
        print("total loop ", n_batches_per_epoch, "in one epoch，", batch_size, "items in one loop")
 
        train_cost = 0
        train_ler = 0
        next_idx = 0
 
        for batch in range(n_batches_per_epoch):  # 一次batch_size，取多少次
            # 取数据
            next_idx, source, source_lengths, sparse_labels = next_batch(wavs,labels,next_idx ,batch_size)

            feed = {input_tensor: source, targets: sparse_labels, seq_length: source_lengths,
                    keep_dropout: keep_dropout_rate}
 
            # 计算 avg_loss optimizer ;
            batch_cost, _ = sess.run([avg_loss, optimizer], feed_dict=feed)
            train_cost = batch_cost
            #验证模型的准确率，比较耗时，我们训练的时候全力以赴，所以这里先不跑
            if (batch + 1) % 20 == 0:
                print('loop:', batch, 'Train cost: ', train_cost / (batch + 1))
                feed2 = {input_tensor: source, targets: sparse_labels, seq_length: source_lengths, keep_dropout: 1.0}
                d, train_ler = sess.run([decoded[0], ler], feed_dict=feed2)
                #dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=sess)
                #dense_labels = sparse_tuple_to_text(sparse_labels, words)
            
                counter = 0
                print('Label err rate: ', train_ler)
                for orig, decoded_arr in zip(dense_labels, dense_decoded):
                    # convert to strings
                    decoded_str = dense_to_text(decoded_arr, words)
                    print(' file {}'.format(counter))
                    print('Original: {}'.format(orig))
                    print('Decoded:  {}'.format(decoded_str))
                    counter = counter + 1
                    break
 
            #每训练100次保存一下模型
            if (batch + 1) % 100 == 0:
                saver.save(sess, savedir + "saver.cpkt", global_step=epoch)
            epoch_duration = time.time() - epoch_start
    
            log = 'Epoch {}/{}, batch: {}, train_cost: {:.3f}, train_ler: {:.3f}, time: {:.2f} sec'
            print(log.format(epoch, epochs, str(batch), train_cost, train_ler, epoch_duration))
 
 
    train_duration = time.time() - train_start
    print('Training complete, total duration: {:.2f} min'.format(train_duration / 60))

dataset/data_sr/data/1618243237.json
dataset/data_sr/data/1618243179.json
dataset/data_sr/data/1618243233.json
字表大小: 2
Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
n_character:3


2022-07-01 14:57:01.953667: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-01 14:57:01.953768: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 3090 computeCapability: 8.6
coreClock: 1.695GHz coreCount: 82 deviceMemorySize: 23.70GiB deviceMemoryBandwidth: 871.81GiB/s
2022-07-01 14:57:01.953808: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-07-01 14:57:01.953819: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-07-01 14:57:01.953829: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2022-07-01 14:57:01.953838: I tensorflow/stream_executor/

kpt: model/saver.cpkt-213
INFO:tensorflow:Restoring parameters from model/saver.cpkt-213
213


epoch start: 213 total epochs=  214
total loop  576 in one epoch， 32 items in one loop


2022-07-01 15:01:02.577497: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10


Epoch 213/214, batch: 0, train_cost: 2.221, train_ler: 0.000, time: 80.29 sec
Epoch 213/214, batch: 1, train_cost: 2.443, train_ler: 0.000, time: 80.79 sec
Epoch 213/214, batch: 2, train_cost: 1.780, train_ler: 0.000, time: 81.30 sec
Epoch 213/214, batch: 3, train_cost: 2.052, train_ler: 0.000, time: 81.79 sec
Epoch 213/214, batch: 4, train_cost: 1.916, train_ler: 0.000, time: 82.29 sec
Epoch 213/214, batch: 5, train_cost: 2.169, train_ler: 0.000, time: 82.79 sec
Epoch 213/214, batch: 6, train_cost: 2.310, train_ler: 0.000, time: 83.29 sec
Epoch 213/214, batch: 7, train_cost: 3.257, train_ler: 0.000, time: 83.79 sec
Epoch 213/214, batch: 8, train_cost: 2.528, train_ler: 0.000, time: 84.28 sec
Epoch 213/214, batch: 9, train_cost: 1.672, train_ler: 0.000, time: 84.78 sec
Epoch 213/214, batch: 10, train_cost: 1.871, train_ler: 0.000, time: 85.29 sec
Epoch 213/214, batch: 11, train_cost: 2.877, train_ler: 0.000, time: 85.79 sec
Epoch 213/214, batch: 12, train_cost: 2.823, train_ler: 0.000,

NameError: name 'dense_labels' is not defined