<a href="https://colab.research.google.com/github/dhyougit/NLP/blob/main/GenerativeTextModel_usingLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras

In [2]:
import keras
import numpy as np

path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Text corps dimension:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Text corps dimension: 600893


In [3]:
type(text)

str

In [5]:
# extract sentence with same length
maxlen = 60

# get sequence in every 3 letters
step = 3

sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of Sequences:', len(sentences))

chars = sorted(list(set(text)))
print('Unique words:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

# one-hot encoding
print('Vectorize ...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Number of Sequences: 200278
Unique words: 57
Vectorize ...


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = np.zeros((len(sentences), len(chars)), dtype=np.bool)


In [6]:
# Build network (LSTM + dense)

from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [7]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)



In [8]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
import random
import sys

random.seed(42)
start_index = random.randint(0, len(text) - maxlen - 1)

for epoch in range(1, 60):
    print('Epoch', epoch)
    # learn model
    model.fit(x, y, batch_size=128, epochs=1)

    # randomly select seeded text
    seed_text = text[start_index: start_index + maxlen]
    print('--- seed text: "' + seed_text + '"')

    # Change smapling temperature
    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('------ temprature:', temperature)
        generated_text = seed_text
        sys.stdout.write(generated_text)

        # generate sentence
        for i in range(400):
            # converted generated text by one-hot-encoding
            sampled = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(generated_text):
                sampled[0, t, char_indices[char]] = 1.

            # sampling
            preds = model.predict(sampled, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = chars[next_index]

            generated_text += next_char
            generated_text = generated_text[1:]

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

에포크 1
--- seed text: "the slowly ascending ranks and classes, in which,
through fo"
------ temprature: 0.2
the slowly ascending ranks and classes, in which,
through for the the the and in the the the thes in the wore the the the the the the the mereren of anderes and the where the the the the the thereres of the the the the the wheres of the ther and and and and there the the the the mores and the the the the the the the the somerent of rereren the ther the the the the the the and and and and and in the the sore the ther the the the the the the the the the the 
------ temprature: 0.5
the slowly ascending ranks and classes, in which,
through forer, lis and stither the what the ofserender the stores of rethe than touls and of the
stenges and the the deresting mand woresures the soof in thes of salle the dererin ther the rethe and tore the homentatity and fon the the the the cand and of thiches ape there toun fores ine sald perereso the ther and in then ther wher perally, tho mere here so