In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
import time

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [None]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print(f'Length of text: {len(text)} characters')
print(text[:250])

Length of text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [None]:
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [None]:
chars2id={char:index for index, char in enumerate(vocab)}
  
id2chars=np.array(vocab)

In [None]:
def tesx2int(text):
  return np.array([chars2id[char] for char in text])
def int2text(_int):
  return ''.join(id2chars[_int])

In [None]:
alltext2int = tesx2int(text)
alltext2int_dataset = tf.data.Dataset.from_tensor_slices(alltext2int)
seq_length = 100
per_epoch = len(text)//(seq_length+1)
sequences = alltext2int_dataset.batch(seq_length+1, drop_remainder=True)

In [None]:
def split_input_next(sequence):
  input_text = sequence[:-1]#x
  next_text = sequence[1:]#y
  return input_text, next_text
train_dataset = sequences.map(split_input_next)

In [None]:
BATCH_SIZE = 64
seed = 10000
dataset = train_dataset.shuffle(seed).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)
dataset

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [None]:
vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024
batch_size = BATCH_SIZE

class load_model(tf.keras.Model):
  def __init__(self,vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = tf.keras.layers.Embedding(vocab_size,embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,return_sequences=True,return_state=True)
    self.dense = tf.keras.layers.Dense(vocab_size)
  def call(self,x,state=None,return_state=False,training=False):
    x = self.embedding(x,training=training)
    if state is None:
      state = self.gru.get_initial_state(x)
    x, state = self.gru(x,initial_state=state,training=training)
    x = self.dense(x,training=training)
    if return_state:
      return x, state
    else:
      return x



In [None]:
model = load_model(vocab_size,embedding_dim,rnn_units)


In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "load_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  16640     
_________________________________________________________________
gru (GRU)                    multiple                  3938304   
_________________________________________________________________
dense (Dense)                multiple                  66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [None]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)#非sotmax,預測結果非[0,1]from_logits設為True
model.compile(optimizer='adam', loss=loss)
EPOCHS = 20
history = model.fit(dataset, epochs=EPOCHS)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
def generate_text(model,input_word,state=None):
  num = 1000
  temperature = 1.0 #
  input_id = tesx2int(input_word)
  input_id = tf.expand_dims(input_id, 0)
  result = []

  model.reset_states()
  for i in range(num):
    predit,state = model(input_id,state=state,return_state=True)
    predit = predit[:,-1,:]
    predit = predit / temperature
    predit_id = tf.random.categorical(predit, num_samples=1)
    input_id = predit_id
    #input_id = tf.expand_dims(predit_id, 0)
    predit_id = tf.squeeze(predit_id, axis=-1).numpy()
    predit_id = int2text(predit_id)
    result.append(predit_id)
  
  return print(input_word +'\n'+ ''.join(result))

In [None]:
generate_text(model, input_word='ROMEO: ')

ROMEO: 
have you done, so many hours; the
punusant of you all, dies doom the crown.

YORK:
Hing Liesing he is good to be obedoin.
What, like a mighty sea?

STANLEY:
I may not weer; for I'll to go to cry again.
Did you behold your daughter's heir,
To make commits them a lover's gentle Claudio.

JULIET:
O, no more than let here be furnish'd by thy hand;
And so, my most ofet them!

LUCIO:
For all at fretch, which was most quick at the night
That I not have a brother still.

GLOUCESTER:
What, no matter, of his own sovereign,
You do between your lordship; but out air
The dangerous triumphs what mine ears a little winter'd
Rupping of this feast, to kind earth
With one that have before wenches from this king.

GLOUCESTER:
The general make hour begin to cry From contemn'd
Shall be the Lady Bona traitors! free
pinch'd twenty heads and before me a lamentard word dispersed
The one in the heart of the fire.

SICINIUS:
He is a brave bed!

HASTINGS:
Killer? now lords, begins our parting seas.

QUEEN