<a href="https://colab.research.google.com/github/jjyjung/ai/blob/gh-pages/tf_keras_RNN_Nietzsche.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random, sys, io
import tensorflow as tf
from tensorflow.keras import layers, optimizers
from tensorflow.keras.utils import get_file
from tensorflow.keras.callbacks import LambdaCallback

In [2]:
#Downloads a file from a URL if it not already in the cache.
path = get_file('nietzsche.txt',
        origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()   # Convert text files to lower case
print('corpus length:', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
corpus length: 600893


In [3]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))
for ch in chars:
    print(ch, end=",")
print()

char_indices = dict((c,i) for i, c in enumerate(chars))
indices_char= dict((i,c) for i, c in enumerate(chars))

#cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step): #range(start,stop,step)
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

total chars: 57

, ,!,",',(,),,,-,.,0,1,2,3,4,5,6,7,8,9,:,;,=,?,[,],_,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,ä,æ,é,ë,
nb sequences: 200285


In [4]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
        #print(i,t,char)
    y[i, char_indices[next_chars[i]]]=1
    
# build the model: a single LSTM
print('Build model...')
model = tf.keras.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = optimizers.RMSprop(lr=0.01)

model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Vectorization...


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = np.zeros((len(sentences), len(chars)), dtype=np.bool)


Build model...


  super(RMSprop, self).__init__(name, **kwargs)


In [5]:
def sample(preds, temperature=1.0):
    #helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def on_epoch_end(epoch, _):
    #Function invoked at end of each epoch. Prints generated text.
    print()
    print('----Generating text after Epoch: %d ' % epoch)
    
    start_index = random.randint(0, len(text) - maxlen - 1) #임의로 시작
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('---- diversity:', diversity)
        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        #텍스트의 임의의 부분을 잡아서 seed sentence로 시작
        print('----Generating with seed: "' + sentence + '""')
        sys.stdout.write(generated)
        
        for i in range(400):
            x_pred = np.zeros((1,maxlen,len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0,t,char_indices[char]] = 1      #seed sentence를 입력
    
            preds = model.predict(x_pred, verbose=0)[0] #후속 글자 예측
            ##후속 글자 확률이 나왔을 때, multinomial 분포로 랜덤하게 char 생성
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
    
            generated += next_char
            sentence = sentence[1:] + next_char
    
            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [6]:
#training중 epoch 직후에 호출할 수 있는 함수 정의 기능
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x,y,
         batch_size=128,
         epochs=50,
         callbacks=[print_callback])

Epoch 1/50
----Generating text after Epoch: 0 
---- diversity: 0.2
----Generating with seed: "e introduced to a second,
substantial wo""
e introduced to a second,
substantial would the sublessed to a subless of the spections of the species of the species of the species of the species of the species, the species of the species of the species of the species of the species of the species of the species of the species, the seared the species of the presented of the species of the presided to a species of the species of the species of the species of the secrection of the spec
---- diversity: 0.5
----Generating with seed: "e introduced to a second,
substantial wo""
e introduced to a second,
substantial world, as a more of the degreat of the vidure with it as self are himself and cirtater man approfice of secrection of the more allowers, and readed in the
provice, of at inconsers and a spart and conscians of the consequently, the
respecial in the consequence. with the consequence. the a spect

  preds = np.log(preds) / temperature


m, eosginabercait sequence, the subject,
ariupbile genius
of fac
Epoch 19/50
----Generating text after Epoch: 18 
---- diversity: 0.2
----Generating with seed: "a merely national affair.

246. what a t""
a merely national affair.

246. what a themself the science of the subject of the sense of the superstition of the superstition of the self-self-destruction of the sense of the sense of the surely to the subject, the sense of the succession of the subject of the soul of the subjection of the senses of the consider of the subjections of the senses of the considerable of the subject of the succession of the fact that the self-self-contrar
---- diversity: 0.5
----Generating with seed: "a merely national affair.

246. what a t""
a merely national affair.

246. what a thing in the sense of the suttering of the concraelly one may preveloms of the promistic profession of the suffering the simalially succession of process of the origin, it is always the forence with their own sensele the resis

<keras.callbacks.History at 0x7f74f51c0790>