文本生成

参照 [官网tutorials](https://www.tensorflow.org/tutorials/text/text_generation#%E4%B8%8B%E8%BD%BD%E8%8E%8E%E5%A3%AB%E6%AF%94%E4%BA%9A%E6%95%B0%E6%8D%AE%E9%9B%86)

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline
from tqdm.auto import tqdm
import concurrent.futures
from multiprocessing import Pool
import copy,os,sys,psutil
from collections import Counter,deque
import itertools
import os

In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt

In [3]:
tf.enable_eager_execution()

In [3]:
# 莎士比亚数据集
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
# 读取并为 py2 compat 解码
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
vocab = sorted(set(text))

# 创建从非重复字符到索引的映射
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
# 全文映射
text_as_int = np.array([char2idx[c] for c in text])

In [4]:
# 文本长度是指文本中的字符个数
print ('>>> Length of text: {} characters'.format(len(text)))

# 文本中的非重复字符
print ('>>> vocab size: {} unique characters'.format(len(vocab)))

# 看一看文本中的前 250 个字符
print(">>> show head 250 characters\n",text[:250])


>>> Length of text: 1115394 characters
>>> vocab size: 65 unique characters
>>> show head 250 characters
 First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



向量化文本

In [6]:
print(f">>> text_as_int: shape {text_as_int.shape}\n",text_as_int)
print(">>> top10 of char2idx:")
for idx,(k,v) in enumerate(char2idx.items()):
    if idx <= 10:
        print(f"{repr(k):4s}:'{v:2d}'")
print(">>> top10 of idx2char:")
for i in range(10):
    print(f"{i} : {repr(idx2char[i])}")

# 显示文本首 13 个字符的整数映射
print(">>> 显示文本首 13 个字符的整数映射")
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

>>> text_as_int: shape (1115394,)
 [18 47 56 ... 45  8  0]
>>> top10 of char2idx:
'\n':' 0'
' ' :' 1'
'!' :' 2'
'$' :' 3'
'&' :' 4'
"'" :' 5'
',' :' 6'
'-' :' 7'
'.' :' 8'
'3' :' 9'
':' :'10'
>>> top10 of idx2char:
0 : '\n'
1 : ' '
2 : '!'
3 : '$'
4 : '&'
5 : "'"
6 : ','
7 : '-'
8 : '.'
9 : '3'
>>> 显示文本首 13 个字符的整数映射
'First Citizen' ---- characters mapped to int ---- > [18 47 56 57 58  1 15 47 58 47 64 43 52]


给定一个字符或者一个字符序列，下一个最可能出现的字符是什么？
> 将文本拆分为长度为 seq_length+1 的文本块。例如，假设 seq_length 为 4 而且文本为 “Hello”， 那么输入序列将为 “Hell”，目标序列将为 “ello”。

# 直接拷贝手册

## 数据

In [7]:
# 设定每个输入句子长度的最大值
seq_length = 100
examples_per_epoch = len(text)//seq_length

def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text


# 创建训练样本 / 目标
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
sequences_dataset = char_dataset.batch(seq_length+1, drop_remainder=True)
dataset = sequences_dataset.map(split_input_target)


# 批大小
BATCH_SIZE = 64
# 设定缓冲区大小，以重新排列数据集
# （TF 数据被设计为可以处理可能是无限的序列，
# 所以它不会试图在内存中重新排列整个序列。相反，
# 它维持一个缓冲区，在缓冲区重新排列元素。） 
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset

<DatasetV1Adapter shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

## 模型

注意到这里`tf.keras.layers.Embedding`的参数`batch_input_shape=[batch_size, None]`，实际是指定了`batch_size`而省略了`seq_length`，也就是模型支持任意长度的句子，后面在恢复模型时使用`model.build(tf.TensorShape([1, None]))`也是只把`batch_size`指定为1，支持任意长度的句子

In [5]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units,
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
      ])
    return model

In [8]:
model = build_model(vocab_size = len(vocab),embedding_dim=embedding_dim,rnn_units=rnn_units,batch_size=BATCH_SIZE)
model.summary()

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3935232   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


## 训练

In [7]:
# 检查点保存至的目录
checkpoint_dir = './tmp/NLG_ckpt'

# 检查点的文件名
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

EPOCHS=40
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

NameError: name 'model' is not defined

## 生成文本

恢复模型

In [120]:
vocab_size=65
embedding_dim=256
rnn_units=1024

checkpoint_dir = './tmp/NLG_ckpt'
latest_ckpt = tf.train.latest_checkpoint(checkpoint_dir)
print(f">>> latest ckpt: '{latest_ckpt}'")

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(latest_ckpt)

model.build(tf.TensorShape([1, None]))
model.summary()

>>> latest ckpt: './tmp/NLG_ckpt/ckpt_30'


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f650fe08b38>

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            16640     
_________________________________________________________________
gru_1 (GRU)                  (1, None, 1024)           3935232   
_________________________________________________________________
dense_1 (Dense)              (1, None, 65)             66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


In [7]:
k_sess = tf.keras.backend.get_session()
k_sess

<tensorflow.python.client.session.Session at 0x7f665ae85eb8>

In [399]:
def pick_from_top_n(preds_, top_n=None, random=False, verbose=False):
    if top_n is None:
        top_n = len(preds_)
    preds = preds_.copy()  # 避免改变原preds
    p = np.squeeze(preds)
    # 小于0的都置为0
    p = np.where(p>=0, p, 0)
    # 将除了top_n个预测值的位置都置为0
    p[np.argsort(p)[:-top_n]] = 0
    # 归一化概率
    p = np.exp(p-max(p)) / sum(np.exp(p-max(p)))
    # 输出top_N的非0概率分布
    if verbose:
        # 排序并把索引(idx2char)和值(prob)zip到一起
        prob_idx = np.stack([np.sort(p), np.argsort(p)],axis=1)[::-1]
        # 去掉0
        prob_idx = prob_idx[prob_idx[:,0]>0]
        print(prob_idx[:5])
    # 随机选取一个字符 / 或者取概率最大的字符
    c = np.random.choice(len(preds_), 1, p=p)[0] if random else np.argmax(preds)
    return c

def generate_text(model, start_string,num_generate = 20,  temperature = 1.0, random=True):
    if not random:
        print("[WARN] without random.choice, 'temperature' will not take effect")
    # 将起始字符串转换为数字（向量化）
    input_eval = [char2idx[s] for s in start_string]
    input_eval = np.expand_dims(input_eval, 0)

    # 空字符串用于存储结果
    text_generated = []
    # 低温度会生成更可预测的文本
    # 较高温度会生成更令人惊讶的文本
    # 可以通过试验以找到最好的设定


    # 这里批大小为 1
    model.reset_states()
    for i in tqdm(range(num_generate)):
        predictions = model.predict(input_eval)
        # 删除批次的维度
        predictions = np.squeeze(predictions, 0)
        # 用分类分布预测模型返回的字符
        predictions = predictions / temperature
        predicted_id = pick_from_top_n(predictions[-1],top_n=None,random=random,verbose=False)
        
        # 把预测字符和前面的隐藏状态一起传递给模型作为下一个输入
        input_eval = np.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
    return (start_string + ''.join(text_generated))


In [404]:
np.set_printoptions(suppress=True)
# print(generate_text(model, start_string=u"ROME", num_generate=300, temperature=0.7))
print(generate_text(model, start_string=u"ROME", num_generate=100, temperature=0.1, random=True))
print("~"*20)
print(generate_text(model, start_string=u"ROME", num_generate=100, temperature=1.0, random=False))

HBox(children=(IntProgress(value=0), HTML(value='')))


ROMER:
So do I too, if it be so, for I must go
To keep the man of some strip star her riches standing;
B
~~~~~~~~~~~~~~~~~~~~
[WARN] without random.choice, 'temperature' will not take effect


HBox(children=(IntProgress(value=0), HTML(value='')))


ROMER:
So do I too, if it be so, for I must go
To keep the prince his son and heir: he is come from him.


In [34]:
# 评估步骤（用学习过的模型生成文本）
def generate_text_custom(model, start_string, temperature=1.0, num_generate=1000, verbose=True):

    # 将起始字符串转换为数字（向量化）
    input_eval_raw = [char2idx[s] for s in start_string]  # (3,)
    input_eval = tf.expand_dims(input_eval_raw, 0)  # (1,3)
    if verbose:
        print(f">>> 输入是: '{start_string}'")
        print(f">>> +向量化: {input_eval_raw}")
        print(f">>> +维度校正: {input_eval}")
    # 空字符串用于存储结果
    text_generated = []

    # 低温度会生成更可预测的文本
    # 较高温度会生成更令人惊讶的文本
    # 可以通过试验以找到最好的设定
    k_sess = tf.keras.backend.get_session()
    # 这里批大小为 1
    model.reset_states()  # 清掉stateful layer的隐状态
    for i in tqdm(range(num_generate),desc="predict next char"):
        predictions = model(input_eval)
        # 删除批次的维度
        predictions = tf.squeeze(predictions, 0)

        # 用分类分布预测模型返回的字符
        predictions = predictions / temperature
        predicted_seq = tf.random.categorical(predictions, num_samples=1)
        predicted_seq = k_sess.run(predicted_seq)
        # 最后一个字母
        predicted_id = predicted_seq[-1,0]
        text_generated.append(idx2char[predicted_id])
        # 把预测字符和前面的隐藏状态一起传递给模型作为下一个输入
        input_eval = tf.expand_dims([predicted_id], 0)
        if verbose:
            print(f">>> 模型prediction结果shape: {model(input_eval).shape} --squeeze at 0--> {predictions.shape}")
            print(f"    对应输入的预测字符串是: {predicted_seq.ravel()}")
            print(f"    +decode: {repr(''.join([idx2char[i] for i in predicted_seq.ravel()]))}")
            print(f"    +取最后一个字母: '{predicted_id}'")

    return (start_string + ''.join(text_generated))

In [48]:
res = generate_text_custom(model, start_string=u"ROMEO: ", temperature=1.0, num_generate=10,verbose=False)
print(f">>> 最后输出的结果是: {repr(res)}")
print(res)

HBox(children=(IntProgress(value=0, description='predict next char', max=10, style=ProgressStyle(description_w…

>>> 最后输出的结果是: 'ROMEO: n IOFLANEN'
ROMEO: n IOFLANEN


# 预览

In [111]:
iterator=dataset.make_one_shot_iterator()
with tf.Session() as sess:
    input_example, target_example = sess.run(iterator.get_next())
    print (f'>>> Input data: {input_example.shape} 演示的是字符索引映射回字符并join成字符串\n')
    np.array(["".join(i) for i in idx2char[input_example]])
    print (f'>>> Target data: {target_example.shape} 演示的是字符索引映射回字符并join成字符串\n')
    np.array(["".join(i) for i in idx2char[target_example]])
    print(f">>> input: {input_example.shape}\n",input_example,f"\n>>> output: {target_example.shape}\n",target_example)
    

print(">>> 模拟训练过程中的输入与label (演示每个batch的前五个)")
for i, (input_idx, target_idx) in enumerate(zip(input_example[:, :5], target_example[:, :5])):
    res = np.transpose(np.stack([idx2char[input_idx],np.full(idx2char[input_idx].shape, "--RNN->"),idx2char[target_idx]]))
    print(f"  Step-{i} expect:\n",res)


>>> Input data: (4, 20) 演示的是字符索引映射回字符并join成字符串



array(['ter thought of, a li', ' grant may never\nBe ',
       's was\nA worthy offic', ' my hands\nOf this mo'], dtype='<U20')

>>> Target data: (4, 20) 演示的是字符索引映射回字符并join成字符串



array(['er thought of, a lit', 'grant may never\nBe h',
       ' was\nA worthy office', 'my hands\nOf this mos'], dtype='<U20')

>>> input: (4, 20)
 [[58 43 56  1 58 46 53 59 45 46 58  1 53 44  6  1 39  1 50 47]
 [ 1 45 56 39 52 58  1 51 39 63  1 52 43 60 43 56  0 14 43  1]
 [57  1 61 39 57  0 13  1 61 53 56 58 46 63  1 53 44 44 47 41]
 [ 1 51 63  1 46 39 52 42 57  0 27 44  1 58 46 47 57  1 51 53]] 
>>> output: (4, 20)
 [[43 56  1 58 46 53 59 45 46 58  1 53 44  6  1 39  1 50 47 58]
 [45 56 39 52 58  1 51 39 63  1 52 43 60 43 56  0 14 43  1 46]
 [ 1 61 39 57  0 13  1 61 53 56 58 46 63  1 53 44 44 47 41 43]
 [51 63  1 46 39 52 42 57  0 27 44  1 58 46 47 57  1 51 53 57]]
>>> 模拟训练过程中的输入与label (演示每个batch的前五个)
  Step-0 expect:
 [['t' '--RNN->' 'e']
 ['e' '--RNN->' 'r']
 ['r' '--RNN->' ' ']
 [' ' '--RNN->' 't']
 ['t' '--RNN->' 'h']]
  Step-1 expect:
 [[' ' '--RNN->' 'g']
 ['g' '--RNN->' 'r']
 ['r' '--RNN->' 'a']
 ['a' '--RNN->' 'n']
 ['n' '--RNN->' 't']]
  Step-2 expect:
 [['s' '--RNN->' ' ']
 [' ' '--RNN->' 'w']
 ['w' '--RNN->' 'a']
 ['a' '--RNN->' 's']
 ['s' '--RNN->' '\n']]
  Step-3 expect:
 [[' ' '--RNN->' 'm']
 ['

In [82]:
# 词集的长度
vocab_size = len(vocab)

# 嵌入的维度
embedding_dim = 256

# RNN 的单元数量
rnn_units = 1024

model = tf.keras.Sequential([
        tf.keras.layers.Embedding(vocab_size, embedding_dim,batch_input_shape=[batch_size, None]),
        tf.keras.layers.GRU(rnn_units, return_sequences=True, stateful=True,
                            recurrent_initializer='glorot_uniform'),
        tf.keras.layers.Dense(vocab_size)
      ])
model.summary()
# 这个loss重点是把 from_logits=True  默认是False
def loss(labels, logits):
      return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
model.compile(optimizer='adam', loss=loss,metrics=['acc'])
example_batch_predictions = model(input_example)
example_batch_predictions.shape

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (4, None, 256)            16640     
_________________________________________________________________
gru_5 (GRU)                  (4, None, 1024)           3935232   
_________________________________________________________________
dense_5 (Dense)              (4, None, 65)             66625     
Total params: 4,018,497
Trainable params: 4,018,497
Non-trainable params: 0
_________________________________________________________________


TensorShape([Dimension(4), Dimension(20), Dimension(65)])

In [67]:
ckpt_dir = "./tmp/NLG_ckpt"
ckpt_path = os.path.join(ckpt_dir, "ckpt_{epoch}")
ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=ckpt_path, save_weights_only=True)

In [110]:
dataset

<DatasetV1Adapter shapes: ((4, 20), (4, 20)), types: (tf.int64, tf.int64)>

<font style='color:red'> 这里获取session必须在模型搭建完之后 </font>
否则会报错:
> Error while reading resource variable xxx/xxx from Container: localhost

看看随机初始化的模型结果

In [61]:
def pick_top_n(preds_, top_n=None, random=False):
    if top_n is None:
        top_n = len(preds_)
    preds = preds_.copy()  # 避免改变原preds
    p = np.squeeze(preds)
    # 小于0的都置为0
    p = np.where(p>=0, p, 0)
    # 将除了top_n个预测值的位置都置为0
    p[np.argsort(p)[:-top_n]] = 0
    # 归一化概率
    p = p / np.sum(p)
    # 随机选取一个字符 / 或者取概率最大的字符
    c = np.random.choice(len(preds_), 1, p=p)[0] if random else np.argmax(preds)
    return c
k_sess = tf.keras.backend.get_session()
pred = k_sess.run(model(input_example))
chose = np.array([[pick_top_n(char_probs) for char_probs in sequences] for sequences in pred ])
print(">>> 模型随机初始化时的predict结果 (只取第一句看效果)")
f"输入：[idx2char]:'{''.join(idx2char[input_example][0]):s}' [idx]:'{input_example[0]}'"
f"目标：[idx2char]:'{''.join(idx2char[target_example][0]):s}' [idx]:'{target_example[0]}'"
f"从输出的概率分布取top：[idx2char]:'{''.join(idx2char[chose][0]):s}' [idx]:'{chose[0]}'"
f"直接输出：[idx2char]:"
pred[0]
f"损失：[CE-loss]:"
k_sess.run(tf.keras.losses.sparse_categorical_crossentropy(target_example[0], pred[0], from_logits=True))

print(">>> 下面是详细")
print(f"输入: {input_example.shape}")
idx2char[input_example]
print(f"标注: {target_example.shape}")
idx2char[target_example]
print(f"模型预测: {pred.shape} --pick--> {chose.shape}")
idx2char[chose]
loss = k_sess.run(tf.keras.losses.sparse_categorical_crossentropy(target_example, pred, from_logits=True))
print(f"CE损失: [shape]:{loss.shape}\n",loss)


>>> 模型随机初始化时的predict结果 (只取第一句看效果)


"输入：[idx2char]:'hat, what? let's par' [idx]:'[46 39 58  6  1 61 46 39 58 12  1 50 43 58  5 57  1 54 39 56]'"

"目标：[idx2char]:'at, what? let's part' [idx]:'[39 58  6  1 61 46 39 58 12  1 50 43 58  5 57  1 54 39 56 58]'"

"从输出的概率分布取top：[idx2char]:'ccxzHqqzx?HuTFH-H;;;' [idx]:'[41 41 62 64 20 55 55 64 62 12 20 59 32 18 20  7 20 11 11 11]'"

'直接输出：[idx2char]:'

array([[ 0.00510419, -0.0116956 , -0.01077805, ...,  0.00211378,
         0.00469348,  0.00368046],
       [-0.01090839, -0.02199575, -0.00651144, ...,  0.0099673 ,
         0.00253165,  0.01090109],
       [ 0.00170103, -0.00854962, -0.01383094, ...,  0.01873878,
         0.01389195,  0.00728091],
       ...,
       [-0.00642838,  0.00728031,  0.0063029 , ..., -0.00143693,
         0.01254796,  0.00130002],
       [-0.01836203, -0.0148465 ,  0.00415995, ...,  0.00432193,
         0.01151806,  0.00901371],
       [-0.00422909,  0.00253295, -0.00814815, ...,  0.01425106,
         0.0067365 ,  0.00757992]], dtype=float32)

'损失：[CE-loss]:'

array([4.160237 , 4.176042 , 4.1832085, 4.1777472, 4.176107 , 4.165276 ,
       4.155126 , 4.177448 , 4.1557145, 4.1786823, 4.17679  , 4.1778092,
       4.1722336, 4.180721 , 4.175385 , 4.159927 , 4.1801143, 4.1773624,
       4.1755776, 4.1700773], dtype=float32)

>>> 下面是详细
输入: (4, 20)


array([['h', 'a', 't', ',', ' ', 'w', 'h', 'a', 't', '?', ' ', 'l', 'e',
        't', "'", 's', ' ', 'p', 'a', 'r'],
       ['t', 'e', 'r', 'p', 'r', 'e', 't', 'a', 't', 'i', 'o', 'n', ' ',
        'o', 'f', ' ', 't', 'h', 'e', ' '],
       ['d', ';', '\n', 'Y', 'o', 'u', 'r', ' ', 'm', 'o', 's', 't', ' ',
        'g', 'r', 'a', 'v', 'e', ' ', 'b'],
       [' ', 'm', 'y', ' ', 'f', 'r', 'i', 'e', 'n', 'd', 's', ';', ' ',
        'a', 'n', 'd', ',', ' ', 't', 'o']], dtype='<U1')

标注: (4, 20)


array([['a', 't', ',', ' ', 'w', 'h', 'a', 't', '?', ' ', 'l', 'e', 't',
        "'", 's', ' ', 'p', 'a', 'r', 't'],
       ['e', 'r', 'p', 'r', 'e', 't', 'a', 't', 'i', 'o', 'n', ' ', 'o',
        'f', ' ', 't', 'h', 'e', ' ', 't'],
       [';', '\n', 'Y', 'o', 'u', 'r', ' ', 'm', 'o', 's', 't', ' ', 'g',
        'r', 'a', 'v', 'e', ' ', 'b', 'e'],
       ['m', 'y', ' ', 'f', 'r', 'i', 'e', 'n', 'd', 's', ';', ' ', 'a',
        'n', 'd', ',', ' ', 't', 'o', ' ']], dtype='<U1')

模型预测: (4, 20, 65) --pick--> (4, 20)


array([['c', 'c', 'x', 'z', 'H', 'q', 'q', 'z', 'x', '?', 'H', 'u', 'T',
        'F', 'H', '-', 'H', ';', ';', ';'],
       ['F', 'T', 'b', ';', ';', ';', 'F', 'w', '-', 'J', '-', 'N', ' ',
        ' ', 'N', 'y', 'y', 'y', 'J', 'b'],
       ['d', 'F', 'n', 'C', ' ', 'S', 'N', ' ', 'a', 'W', 'L', 'W', 'Z',
        ' ', 'b', 'L', 'L', 'O', 's', 'H'],
       ['H', 'a', 'a', 'a', 'r', 'N', 'J', 'J', 'P', 'J', ' ', 'x', ' ',
        'L', 'c', 'd', 'Q', 'Z', 'Z', ' ']], dtype='<U1')

CE损失: [shape]:(4, 20)
 [[4.160237  4.176042  4.1832085 4.1777472 4.176107  4.165276  4.155126
  4.177448  4.1557145 4.1786823 4.17679   4.1778092 4.1722336 4.180721
  4.175385  4.159927  4.1801143 4.1773624 4.1755776 4.1700773]
 [4.178322  4.168017  4.181786  4.173589  4.1760755 4.17192   4.1770372
  4.174807  4.173535  4.1943154 4.1818757 4.1687164 4.1773987 4.1741657
  4.175613  4.1702967 4.1787305 4.187074  4.1721363 4.173623 ]
 [4.1857905 4.1755123 4.1723166 4.1690807 4.1669025 4.185561  4.160659
  4.1786213 4.1834593 4.1792254 4.172516  4.167448  4.1922054 4.175457
  4.1713305 4.183974  4.177972  4.1710653 4.161363  4.187027 ]
 [4.1765637 4.1705246 4.1785254 4.1679688 4.1452475 4.1743045 4.175
  4.1660066 4.1787114 4.1779504 4.1827216 4.156034  4.169887  4.187935
  4.1734324 4.1669626 4.1684337 4.1737814 4.1864586 4.156705 ]]


# 问题

一个错误的示例 | 输入都用np转成arr

In [None]:
print("【实际不会用这种输入方式】如果输入是list型的，取的是第0个不管后面的")
model.reset_states()
np.squeeze(model.predict([np.array([1,2])]),1)[:,:5]
model.reset_states()
np.squeeze(model.predict([np.array([1])]),1)[:,:5]
np.squeeze(model.predict([np.array([2])]),1)[:,:5]


<font style="color:red"> 
为什么？？

预测`arr([[1,2]])`再预测`arr([[3,4]])` 
- 等价于直接预测 `arr([[1,2],[3,4]])` ✅
- 不等价与直接预测 `arr([[1,2,3,4]])` ❎
</font>

In [118]:
print("先预测arr([[1,2]]) 在预测arr([[3,4]]) ")
model.reset_states()
p1 = model.predict(np.array([[1,2]]))
p2 = model.predict(np.array([[3,4]]))
print(f">>> arr([[1,2]])的shape:{np.array([[1,2]]).shape} --输出->{p1.shape}")
np.squeeze(p1,0)[:,:5]
print(f">>> arr([[3,4]])的shape:{np.array([[3,4]]).shape} --输出->{p2.shape}")
np.squeeze(p2,0)[:,:5]

print("直接预测arr([[1,2],[3,4]])")
model.reset_states()
p1_2 = model.predict(np.array([[1,2],[3,4]]))
print(f">>> arr([[1,2],[,34]])的shape:{np.array([[1,2],[3,4]]).shape} --输出->{p1_2.shape}")
p1_2[:,:,:5]

print("直接预测arr([[1,2,3,4]])")
model.reset_states()
p12=model.predict(np.array([[1,2,3,4]]))
print(f">>> arr([[1,2,3,4]])的shape:{np.array([[1,2,3,4]]).shape} --输出->{p12.shape}")
np.squeeze(p12,0)[:,:5]

先预测arr([[1,2]]) 在预测arr([[3,4]]) 
>>> arr([[1,2]])的shape:(1, 2) --输出->(1, 2, 65)


array([[ -8.832616 , -15.162733 ,  -7.911195 ,  -2.1716287,  -2.1839044],
       [ 11.909098 ,  10.299256 ,  -8.214893 ,  -8.358577 , -10.036395 ]],
      dtype=float32)

>>> arr([[3,4]])的shape:(1, 2) --输出->(1, 2, 65)


array([[ 0.35068864,  0.43206474, -4.32421   , -3.1326041 , -2.0293384 ],
       [-0.7903478 , -1.5058678 , -7.7205534 , -8.223289  , -2.4436786 ]],
      dtype=float32)

直接预测arr([[1,2],[3,4]])
>>> arr([[1,2],[,34]])的shape:(2, 2) --输出->(2, 2, 65)


array([[[ -8.832616  , -15.162733  ,  -7.911195  ,  -2.1716287 ,
          -2.1839044 ],
        [ 11.909098  ,  10.299256  ,  -8.214893  ,  -8.358577  ,
         -10.036395  ]],

       [[  0.35068864,   0.43206474,  -4.32421   ,  -3.1326041 ,
          -2.0293384 ],
        [ -0.7903478 ,  -1.5058678 ,  -7.7205534 ,  -8.223289  ,
          -2.4436786 ]]], dtype=float32)

直接预测arr([[1,2,3,4]])
>>> arr([[1,2,3,4]])的shape:(1, 4) --输出->(1, 4, 65)


array([[ -8.832616  , -15.162731  ,  -7.9111934 ,  -2.171629  ,
         -2.1839037 ],
       [ 11.909098  ,  10.299255  ,  -8.214894  ,  -8.358576  ,
        -10.036394  ],
       [  1.7228051 ,  -1.361204  ,  -4.9199667 ,  -9.902154  ,
         -2.429722  ],
       [ -1.3204055 ,  -0.08819595,  -0.79439396,  -6.6956706 ,
          0.5630963 ]], dtype=float32)