In [60]:
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.models import Sequential
from keras.layers.embeddings import Embedding
from keras.layers import Dense, Activation
import numpy as np

In [2]:
import re
from nltk.tokenize import word_tokenize
INPUT_FILE = "ADVENTURES_OF_SHERLOCK_HOLMES.txt"
text = []
# extract the input as a stream of characters
print("Extracting text from input...")
fin = open(INPUT_FILE, 'r', encoding='utf-8')
lines = []
counter = 0
for line in fin:
    counter = counter + 1 
    if(counter <= 5000): 
        new_line = re.findall('\w+', line)
        for word in new_line:
            word = word_tokenize(word)
            text.append(word[0].lower())
fin.close()


Extracting text from input...


In [4]:
# creating lookup tables
# Here chars is the number of features in our character "vocabulary"
word = set([c for c in text])
nb_words = len(word)
print(nb_words)
word2index = dict((c, i) for i, c in enumerate(word))
index2word = dict((i, c) for i, c in enumerate(word))

5147


In [5]:
print("Creating input and label text...")
SEQLEN = 10
STEP = 1

input_words = []
label_words = []
for i in range(0, len(text) - SEQLEN, STEP):
    input_words.append(text[i:i + SEQLEN])
    label_words.append(text[i + SEQLEN])


Creating input and label text...


In [6]:
# vectorize the input and label chars
# Each row of the input is represented by seqlen characters, each 
# represented as a 1-hot encoding of size len(char). There are 
# len(input_chars) such rows, so shape(X) is (len(input_chars),
# seqlen, nb_chars).
# Each row of output is a single character, also represented as a
# dense encoding of size len(char). Hence shape(y) is (len(input_chars),
# nb_chars).
print("Vectorizing input and label text...")
X = np.zeros((len(input_words), SEQLEN))
y = np.zeros((len(input_words), nb_words), dtype=np.bool)
for i,ch in enumerate(input_words):
    for j, cm in enumerate(ch):
        X[i, j] = word2index[cm]
    y[i, word2index[label_words[i]]] = 1

Vectorizing input and label text...


Simple RNN

In [7]:
# Build the model. We use a single RNN with a fully connected layer
# to compute the most likely predicted output char
HIDDEN_SIZE = 128
BATCH_SIZE = 64
NUM_ITERATIONS = 25
NUM_EPOCHS_PER_ITERATION = 1
NUM_PREDS_PER_EPOCH = 100

RNNmodel = Sequential()
RNNmodel.add(Embedding(nb_words, 64, input_length=SEQLEN))
RNNmodel.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
RNNmodel.add(Dense(nb_words))
RNNmodel.add(Activation("softmax"))

RNNmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

In [8]:
for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    RNNmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    
    # testing model
    # randomly choose a row from input_chars, then use it to 
    # generate text from model for next 100  
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = RNNmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

Iteration #: 0
Epoch 1/1
Generating from seed: 
by his whole appearance he carried a broad brimmed hat 

by his whole appearance he carried a broad brimmed hat and the own man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man 
Iteration #: 1
Epoch 1/1
Generating from seed: 
and said that it would be safer and better not 

and said that it would be safer and better not have be the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of 

to frighten a chap for he sank his face onto his father was in his father was in his own and he was a little of the which was of a in which i have the very of the but i had not been of some and or my father was in his father and what do you know that it is not be well to the your be in a very is not so that i had not see my own said holmes you have been in a very of which i had been to his own and in the other were of the which was no so in 
Iteration #: 12
Epoch 1/1
Generating from seed: 
does but he is too tender hearted to hurt a 

does but he is too tender hearted to hurt a little but i am a little for one of the which was not to the with a of the of which i have been in his face and i have not see him that i had not been in him and of the man and as he had a from the that of the of which he had been to the man and the of which was to be a of the man and the of the and s that he was a man s a and and i am that his father was a and for it is a 
Iteration #: 13
Epoch 1/1
Generating f

tossed it it was an indian cigar of the variety which are it in my father i have a very by one of the of some but you would have been to be to an one of his the to the which there was no one of him and i was a man who was the in which he and that it was was that by the was in the which was in the of a but so i was that there was no i was in the which you but but i have been my own man is a you are and i have the at the but i 
Iteration #: 24
Epoch 1/1
Generating from seed: 
good for some years and an extra couple of hundred 

good for some years and an extra couple of hundred that it was very very would be to by the of this of the that i was the that of the and then i had been in upon his it was a little of the and then that there was a to be of a a she and at his have you have been it is that for he has been been in his case and he was not to him and i am a little of a man and i was in my father and i and with a it s is not have been but two is of 


In [12]:
INPUT_FILE = "A_STUDY_IN_SCARLET.txt"
text2 = []

print("Extracting text from input...")
book2 = open(INPUT_FILE, 'r', encoding='utf8')
counter = 0
for line in book2:
    counter = counter + 1 
    if(counter <= 5000): 
        new_line = re.findall('\w+', line)
        for word in new_line:
            word = word_tokenize(word)
            text2.append(word[0].lower())
book2.close()

Extracting text from input...


In [55]:
#Итак, у нас имеются 2 списка: text и text2
#Добавим одинаковые слова во вторую книгу

new_text = []
for t in text2:
    if t in text:
        new_text.append(t)
        
text2 = new_text
#print(text2)

In [15]:
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])


In [50]:
print(len(text2))
print(SEQLEN)
text2_new = text2[:500]
test_words1 = []
test_labels1 = []
for i in range(0, len(text2_new) - SEQLEN, STEP):
    test_words1.append(text2_new[i:i + SEQLEN])
    #print(test_words1)
    test_labels1.append(text2_new[i + SEQLEN])
print(test_words1)

39888
10
[['a', 'study', 'in', 'scarlet', 'by', 'a', '1', 's', 'note', 'this'], ['study', 'in', 'scarlet', 'by', 'a', '1', 's', 'note', 'this', 'is'], ['in', 'scarlet', 'by', 'a', '1', 's', 'note', 'this', 'is', 'from'], ['scarlet', 'by', 'a', '1', 's', 'note', 'this', 'is', 'from', 'an'], ['by', 'a', '1', 's', 'note', 'this', 'is', 'from', 'an', 'and'], ['a', '1', 's', 'note', 'this', 'is', 'from', 'an', 'and', 'care'], ['1', 's', 'note', 'this', 'is', 'from', 'an', 'and', 'care', 'has'], ['s', 'note', 'this', 'is', 'from', 'an', 'and', 'care', 'has', 'been'], ['note', 'this', 'is', 'from', 'an', 'and', 'care', 'has', 'been', 'taken'], ['this', 'is', 'from', 'an', 'and', 'care', 'has', 'been', 'taken', 'to'], ['is', 'from', 'an', 'and', 'care', 'has', 'been', 'taken', 'to', 'the'], ['from', 'an', 'and', 'care', 'has', 'been', 'taken', 'to', 'the', 'exactly'], ['an', 'and', 'care', 'has', 'been', 'taken', 'to', 'the', 'exactly', 'and'], ['and', 'care', 'has', 'been', 'taken', 'to', 'th

In [16]:
import math

In [17]:
def perplexity(probs):
    logsum = 0
    for prob in probs:
        log_prob = math.log2(prob)
        logsum += log_prob
    l = logsum/len(probs)
    perplex = math.pow(2, -1)
    return perplex

In [53]:
def eval_model(model):
    example = []
    for i in range(len(test_words)):
        probs = []
        test_input = test_words[i]
        #print(test_input)
        test_l = test_labels[i]
        test_s = np.zeros((1, SEQLEN))
        for k, word in enumerate(test_input):
            #print('k= ', k)
            #print('word= ', word)
            test_s[0, k] = word2index[word]
        pred = model.predict(test_s, verbose = 0)[0]
        prob = pred[word2index[test_l]]
        probs.append(prob)
    perplex = perplexity(probs)
    return perplex

In [22]:
evaluation = eval_model(RNNmodel)
print(evaluation)

0.5


In [34]:
import random
def generate_text(model):
    start_ind = np.random.randint(len(test_words))
    test_input = test_words[start_ind]
    
    for ind in range(150):
        test_s = np.zeros((1, SEQLEN))
        for w, word in enumerate(test_input):
            test_s[0, w] = word2index[word]
        pred = model.predict(test_s, verbose=0)[0]
        v = []
        for i in range(5):
            max_ind = np.argmax(pred)
            pred_w = index2word[max_ind]
            v.append(pred_w)
            pred[max_ind] = 0
        new_word = random.choice(v)
        print(new_word, end=' ')
        test_input = test_input[1:] + [new_word]
    print()

In [35]:
gen_text = generate_text(RNNmodel)

a he said holmes had the of of a one of he was in up that he could about him and his father is not a one which he and there s no he had the from the that it was the other other man to do not a are of the it but she was to think it of a little of my but we is in the more when we had in a in this some of us in it in a little of i with the but what are was mr for you not see no holmes i up that i could the it then not have come out for it s and in one that his will you had been upon the she s but we have a him and i did in that of it for you which you have had a a man for 


RNN Embedding 100

In [36]:
RNNmodel = Sequential()
RNNmodel.add(Embedding(nb_words, 100, input_length=SEQLEN))
RNNmodel.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
RNNmodel.add(Dense(nb_words))
RNNmodel.add(Activation("softmax"))

RNNmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

In [37]:
for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    RNNmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    
    # testing model
    # randomly choose a row from input_chars, then use it to 
    # generate text from model for next 100  
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = RNNmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

Iteration #: 0
Epoch 1/1
Generating from seed: 
thought he might prove useful so i just ordered him 

thought he might prove useful so i just ordered him to the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of 
Iteration #: 1
Epoch 1/1
Generating from seed: 
by way of the woods to the boscombe pool it 

by way of the woods to the boscombe pool it is a little man of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door of the door o

in braving it with impunity or in which any of a and that is is the that is the which had been been by an as i have the by the of the of the the is the very of the man as i was a to be a and in the that i had the little of an for the holmes the was of the of the of the of the the of the no of the s the of one of the the of the man of the are the man was the man and a man was of his face and his he was a little of 
Iteration #: 12
Epoch 1/1
Generating from seed: 
of bodies lying in strange fantastic poses bowed shoulders bent 

of bodies lying in strange fantastic poses bowed shoulders bent and all the have had been out of the in my of the i have the the of a of the i have been a little of some very little and i have been in the not have the him but i was a to the man of the man with a very he was a to him and he was a to be in the which was a to me but the is of a of my father s it is the a man who had been a very man for a very of a very man and a 
Iteration #: 13
Epoch 1/1
Generating

in the sailing ship i think that it is quite as to that we have been upon a very well that i had been some and some or and we could not have it is to and that i have you have a man with man from the i have been a little of some on you but i am not a little of said holmes i have not have a very man as he as it the me and the one of the not of some the s have been in the i have not have the as but as he could be in the that i was a 
Iteration #: 24
Epoch 1/1
Generating from seed: 
both you and the coroner have been at some pains 

both you and the coroner have been at some pains said he to his be the up to the of the not and a very so i was that i was not one of the man who is in i in my own her and so i am very it is no one of some very but we may not be in his face and i there is a little not a man for a very man in his was but his father was a little of upon which you have had been as i could have the some one of my father i was the there is not a 


In [56]:
#Прописываю еще раз в связи с тем, что на момент обучения использовались эти же переменные
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])

In [57]:
evaluation = eval_model(RNNmodel)
print(evaluation)

0.5


In [58]:
gen_text = generate_text(RNNmodel)

on i had his as some one or holmes could come to in it with you to the i it is there but one and said holmes what you have a is a very to do but and you do you can see that this was i should you see what in you are to i was there you upon the this case mr could you have my been with me to be his with you who were to be his me or his i not his his father but i could there he had as i have been when a as when we have had him with a been that upon me then to his she would do that was with them my own man and that in his face was a little and when his up or as he in it was a s when you will be 


RNN embedding 200

In [None]:
RNNmodel = Sequential()
RNNmodel.add(Embedding(nb_words, 100, input_length=SEQLEN))
RNNmodel.add(SimpleRNN(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
RNNmodel.add(Dense(nb_words))
RNNmodel.add(Activation("softmax"))

RNNmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    RNNmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
    
    # testing model
    # randomly choose a row from input_chars, then use it to 
    # generate text from model for next 100  
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = RNNmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

In [None]:
#Прописываю еще раз в связи с тем, что на момент обучения использовались эти же переменные
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])

In [None]:
evaluation = eval_model(RNNmodel)
print(evaluation)

In [None]:
gen_text = generate_text(RNNmodel)

Lstm model

In [61]:
LSTMmodel = Sequential()
LSTMmodel.add(Embedding(nb_words, 64, input_length=SEQLEN))
LSTMmodel.add(LSTM(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
LSTMmodel.add(Dense(nb_words))
LSTMmodel.add(Activation("softmax"))

LSTMmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

In [62]:
for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    LSTMmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
 
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = LSTMmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

Iteration #: 0
Epoch 1/1
Generating from seed: 
man s story was absolutely true then what hellish thing 

man s story was absolutely true then what hellish thing the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the 
Iteration #: 1
Epoch 1/1
Generating from seed: 
the river to the east of london bridge between a 

the river to the east of london bridge between a man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man of the man

forced to admit that the facts are to the best of the same same same man and the whole same man is of his own man s own man and i have not been very coroner i have been been to the same and i have been heard of the same same same same same whole whole whole little and i shall be to do i shall be to be very own of my own own man and i have not heard my own little little little man and i have been to be whole own and i am not to be than i am not know that i have 
Iteration #: 12
Epoch 1/1
Generating from seed: 
being a traveller in wines they got 4700 for the 

being a traveller in wines they got 4700 for the whole case and i had been whole majesty and i am not know that i have not been been before i am not know that i have been been whole majesty i have been seen of my own own man and i have been heard of the whole case of the same whole whole whole man is to be so i have been to be whole own and i am not know that i have not been been before i am not know that i have been been whole ma

Generating from seed: 
violet ink she had written in a hurry and dipped 

violet ink she had written in a hurry and dipped her street when i am not so be i have been to be so for my is not that the one was had not been in would have been not been not in his not be his father s i have been little for my father s not so i am a man and i have a very of do so i have not an been of my the been of his have been not his his his had been not so his i have been his little little have been not be i have a am not be not 
Iteration #: 24
Epoch 1/1
Generating from seed: 
a crumpled envelope and turning to the table he shook 

a crumpled envelope and turning to the table he shook a very upon which he had not a very man and a man was a he did not him at him and he would have been to come to do so i have not an i have been by the think of you and i have been two of my father had not think i am a little you have no little little for me and i have no very man of his but if he was the of his have been be

In [63]:
#Прописываю еще раз в связи с тем, что на момент обучения использовались эти же переменные
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])

In [64]:
evaluation = eval_model(LSTMmodel)
print(evaluation) 

0.5


In [65]:
gen_text = generate_text(LSTMmodel)

it was in a very more than be that were not have been not to be very for a very of his be i have the two or a shall have one of this be and then to do so he would not be more to do in you see that it would see of an the am and then are it to in my a out upon that all the he must be be so as to have as we were in that there is holmes think of some see you to am a am of mr holmes for the had i think you that i shall not have not some have been a could so do to do in an was a am to one should in the time for it not have his very man for the his own that i was was to the may been 


LSTM embedding 100

In [None]:
LSTMmodel = Sequential()
LSTMmodel.add(Embedding(nb_words, 100, input_length=SEQLEN))
LSTMmodel.add(LSTM(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
LSTMmodel.add(Dense(nb_words))
LSTMmodel.add(Activation("softmax"))

LSTMmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    LSTMmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
 
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = LSTMmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

In [None]:
#Прописываю еще раз в связи с тем, что на момент обучения использовались эти же переменные
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])

In [None]:
evaluation = eval_model(LSTMmodel)
print(evaluation)

In [None]:
gen_text = generate_text(LSTMmodel)

LSTM embedding 200

In [None]:
LSTMmodel = Sequential()
LSTMmodel.add(Embedding(nb_words, 100, input_length=SEQLEN))
LSTMmodel.add(LSTM(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
LSTMmodel.add(Dense(nb_words))
LSTMmodel.add(Activation("softmax"))

LSTMmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    LSTMmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
 
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = LSTMmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

In [None]:
#Прописываю еще раз в связи с тем, что на момент обучения использовались эти же переменные
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])

In [None]:
evaluation = eval_model(LSTMmodel)
print(evaluation)

In [None]:
gen_text = generate_text(LSTMmodel)

GRU

In [66]:
GRUmodel = Sequential()
GRUmodel.add(Embedding(nb_words, 64, input_length=SEQLEN))
GRUmodel.add(LSTM(HIDDEN_SIZE, return_sequences=False,
                    input_shape=(SEQLEN, nb_words),
                    unroll=True))
GRUmodel.add(Dense(nb_words))
GRUmodel.add(Activation("softmax"))

GRUmodel.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['acc'])

for iteration in range(NUM_ITERATIONS):
    print("=" * 50)
    print("Iteration #: %d" % (iteration))
    GRUmodel.fit(X, y, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS_PER_ITERATION)
 
    test_idx = np.random.randint(len(input_words))
    test_words = input_words[test_idx]
    print("Generating from seed: ")
    for i in test_words:
        print(i, end=" ")
    print("\n")
    for i in test_words:
        print(i, end=" ")
    for i in range(NUM_PREDS_PER_EPOCH):
        Xtest = np.zeros((1, SEQLEN))
        for i, ch in enumerate(test_words[-10:len(test_words)]):
            Xtest[0, i] = word2index[ch]
        pred = GRUmodel.predict(Xtest, verbose=0)[0]
        ypred = index2word[np.argmax(pred)]
        print(ypred, end=" ")
        # move forward with test_chars + ypred
        test_words.append(ypred)
    print()

Iteration #: 0
Epoch 1/1
Generating from seed: 
of theories to suit facts but the note itself what 

of theories to suit facts but the note itself what the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the of of the 
Iteration #: 1
Epoch 1/1
Generating from seed: 
on the sundial i read peeping over his shoulder what s not to be of a you and i have a little of a but i am not to be a very i have the little of the but i have it is a very man who is a a man and i have the one of my own and i have been a man and that the has been out the very of the he was a and of the and which i was in the of which i have had been in a and of my father was the of that a holmes and the the is of 

on the sundial i read peeping over his shoul

open drawers as if the lady had hurriedly ransacked them and we had been few small than for the whole man was a man and he was not a little man and i am not a little man and i have been been in the same same same am be and few be few than holmes i have been been in some house and i am not know that i was not a little man and i have been know that i had been been in some own man and i have been know that i had been been in some own man and i have been know that i 
Iteration #: 11
Epoch 1/1
Generating from seed: 
a word became what you would call over here a 

a word became what you would call over here a man said holmes he was a few man and he was not a little man and he was not a few man and he was not a little man and he was not a few man and he was not a little man and he was not a few man and he was not a little man and he was not a few man and he was not a little man and he was not a few man and he was not a little man and he was not a few man and he was not a little man and 
Itera

then i shall go in it but i must owe you that i have not to have been know what is not do you of my father s i have been at the time that i had not to have been know what i was not to have been know what you will not be know that you have been very i have not know that holmes is my in the am so i am not and i have been a very do not know holmes that i should not be the matter of the matter have you know you will see that for the i shall be very so 
Iteration #: 22
Epoch 1/1
Generating from seed: 
of the criminal but how did you gain them you 

of the criminal but how did you gain them you have to be a very man and i have been been in some own man who had not have been very know what i was not that she is not very i have been to think of it but i have not to be very am that there is not one of the man who had a very little of have been own in one of his man was to the man who s a very man and that was all his face and a very little have been to me man and i was not to be very well 
Iter

In [67]:
#Прописываю еще раз в связи с тем, что на момент обучения использовались эти же переменные
test_words = []
test_labels = []
for i in range(0, len(text2) - SEQLEN, STEP):
    test_words.append(text2[i:i + SEQLEN])
    test_labels.append(text2[i + SEQLEN])

In [68]:
evaluation = eval_model(GRUmodel)
print(evaluation)

0.5


In [69]:
gen_text = generate_text(GRUmodel)

but for that it had not from one left an a time to my own but it was not that she will do it is a very man in a very but there some no man or his face for the very own i of him and he would be a more of it old man said when it had been one in his door were by his father i who s much at the other s in the house and that was not in that time she could come at all of my time to you will be the an matter which you may come in you at the matter be i shall be in that an case and found not but your was an had in one come to that and that i would have no am much very not not be holmes so not will have the 


Для каждой модели кроме последней прописаны еще различные embedding-и. 
В целях экономии времени, как бы не было ограничено число эпох (к сожалению, удалось прогнать для 3-х моделей только с единственной эпохой:(), построение моделей с разным числом эмбеддингов тоже пришлось урезать. 
Поэтому выводы будут представлены с начальными эмбеддингами (в этом случае - это 64).
Насчет количества эпох, текст получается более связный и "красивый", если мы будем использовать не одну, а более эпох. При этом показатель loss тоже снижается. Однако, на компьютере прогон одной модели занимает порядка 2,5 часов (скорее всего так долго еще из-за того, что тренирочный текст оказался достаточно большим). Но модель simpleRNN все же удалось прогнать с 3 эпохами и вот, что получилось: 

Iteration #: 24 
Epoch 1/3 
44378/44378 [==============================] - 95s 2ms/step - loss: 3.7130 
Epoch 2/3 
44378/44378 [==============================] - 95s 2ms/step - loss: 3.7065 
Epoch 3/3 
44378/44378 [==============================] - 96s 2ms/step - loss: 3.6996 
preparations have gone so far that we can risk the 

preparations have gone so far that we can risk the very way a down that your of all this have have your you said holmes as to have have the the little then would not have very the to is a or that he has not an now if it is i to not more with her there and i in the in the some nothing to a it was it we never little before by a what and in he then upon an your at was was in a he man in this holmes and when it for he was in the other he was you to what in

Довольно-таки неплохо, как мне кажется:)

Если же говорить о перплексии, то для простой рекурентной нейросети этот показать составил 0,5,
для LSTM - 0,5, 
для GRU - 0,5
Выходит, что с данными условиями все три модели работают примерно одинаково, различия , как можно видеть, при обучении в показаниях loss и accuracy на последних итерациях.