In [36]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras 
import tensorflow as tf

### 處理資料

In [2]:
# 下載所有莎士比亞著作
shakespeare_url = 'http://homl.info/shakespeare'
filepath = keras.utils.get_file('shakespeare.txt', shakespeare_url)  # Downloads a file from a URL
with open(filepath) as f:
    shakespeare_text = f.read()

In [3]:
# 將字元編成整數
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True) # 文字而非單字(編碼)，若不想轉為小寫(預設) lower=False
tokenizer.fit_on_texts([shakespeare_text])

In [11]:
max_id = len(tokenizer.word_index) # 不同字元的數量

In [5]:
dataset_size = len(shakespeare_text)

In [6]:
# 因為編碼從 1 開始
[encoded] = np.array(tokenizer.texts_to_sequences([shakespeare_text])) - 1

In [7]:
# split training and validation datasets (9:1)
train_size = dataset_size * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])  # 一個一個回傳每個字元

In [8]:
# by truncated backpropogation through time
n_steps = 100
window_length = n_steps + 1 # 輸入往前移動一個字元
dataset = dataset.window(window_length, shift=1, drop_remainder=True) # drop_remainder 類似 Conv2d 中的 padding
dataset = dataset.flat_map(lambda window : window.batch(window_length)) # 大小為 window_length 的 Tensor 資料組

In [9]:
batch_size = 32
dataset = dataset.shuffle(10000).batch(batch_size)
dataset = dataset.map(lambda windows : (windows[:,:-1], windows[:,1:]))  # 輸入 : 前 100 個字 ，目標 : 第 101 個字

In [26]:
# one-hot encodding
dataset = dataset.map(
    lambda X_batch, Y_batch : (tf.one_hot(X_batch, depth=max_id), Y_batch))

dataset = dataset.prefetch(1)

TypeError: in converted code:

    <ipython-input-25-81c2411066ab>:3 None  *
        lambda X_batch, Y_batch : (tf.one_hot(X_batch, depth=max_id), Y_batch)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/util/dispatch.py:180 wrapper
        return target(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/array_ops.py:3516 one_hot
        name)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_array_ops.py:6137 one_hot
        off_value=off_value, axis=axis, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py:632 _apply_op_helper
        param_name=input_name)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py:61 _SatisfiesTypeConstraint
        ", ".join(dtypes.as_dtype(x).name for x in allowed_list)))

    TypeError: Value passed to parameter 'indices' has DataType float64 not in list of allowed values: uint8, int32, int64


### 建立與訓練 char-RNN

In [11]:
Final = keras.models.Sequential([
    
    keras.layers.GRU(128, return_sequences=True, input_shape=[None,max_id],
                    dropout=0.2, recurrent_dropout=0.2),
    keras.layers.GRU(128, return_sequences=True,
                    dropout=0.2, recurrent_dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id,
                                                   activation='softmax'))
])

Final.summary()  # 突然不算偏置了

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 128)         64512     
_________________________________________________________________
gru_1 (GRU)                  (None, None, 128)         98688     
_________________________________________________________________
time_distributed (TimeDistri (None, None, 39)          5031      
Total params: 168,231
Trainable params: 168,231
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Final.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
# History = Final.fit(dataset, epochs=20, verbose=1)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on None steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### 儲存模型

In [221]:
# Final.save('Final.h5')

### 產生假的文章

In [61]:
def preprocess(texts):
    
    x=np.squeeze(tokenizer.texts_to_sequences(texts)) - 1

    z = x.reshape((1,len(texts)))
    
    y=tf.one_hot(z, max_id).eval(session=tf.compat.v1.Session())
    
    return y

In [8]:
# 取得下一個字元的預測
def next_char(text, temperature=1):  # temperature 為一種防止 overfitting 的想法(更高的 temperature，每個字元被選重的機率會更相同)
    x_new = preprocess(text)
    y_proba = model.predict(x_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.eval(session=tf.compat.v1.Session()))[0]

In [7]:
# 重複生成下一個字，組成文章
def artical(text, n_chars=50, temperature=1):
    for i in range(n_chars):
        text += next_char(text, temperature)
    return text

### 載入並使用模型

In [8]:
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras 
import tensorflow as tf

In [6]:
from tensorflow.keras.models import load_model

# 載入模型
model = load_model('Final.h5')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 128)         64512     
_________________________________________________________________
gru_1 (GRU)                  (None, None, 128)         98688     
_________________________________________________________________
time_distributed (TimeDistri (None, None, 39)          5031      
Total params: 168,231
Trainable params: 168,231
Non-trainable params: 0
_________________________________________________________________


In [None]:
# 下載所有莎士比亞著作
shakespeare_url = 'http://homl.info/shakespeare'
filepath = keras.utils.get_file('shakespeare.txt', shakespeare_url)  # Downloads a file from a URL
with open(filepath) as f:
    shakespeare_text = f.read()
    
# 將字元編成整數
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True) # 文字而非單字(編碼)，若不想轉為小寫(預設) lower=False
tokenizer.fit_on_texts([shakespeare_text])

max_id = len(tokenizer.word_index) # 不同字元的數量

def preprocess(texts):
    
    x=np.squeeze(tokenizer.texts_to_sequences(texts)) - 1

    z = x.reshape((1,len(texts)))
    
    y=tf.one_hot(z, max_id).eval(session=tf.compat.v1.Session())
    
    return y

# 取得下一個字元的預測
def next_char(text, temperature=1):
    x_new = preprocess(text)
    y_proba = model.predict(x_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.eval(session=tf.compat.v1.Session()))[0]

# 重複生成下一個字，組成文章
def artical(text, n_chars=50, temperature=1):
    for i in range(n_chars):
        text += next_char(text, temperature)
    return text

In [12]:
artical('hello', 100)

'hello reseech you you knows\nnot she is grumio.\n\nhortensio:\nso gentleman, and let me sound wiph bianca:\nbu'

In [13]:
artical('hello',150)

"hellows unto this duly as advise\nshall i will promise you, sirrah, then\ni have mad' me and all her, god their biont and more free\nfor the wofit, for ben st"

In [14]:
artical('hello',50)

'hello his dead; and fairly,\npromise.\n\nlucentio:\ntell me'