In [1]:
import pandas
pandas.set_option('display.max_colwidth', 300)

train_lines = pandas.read_csv('data/sentences.csv', encoding='utf-8')
train_lines[100:105]


Unnamed: 0,line
100,"She is a selfish, hypocritical woman, and I have no opinion of her."
101,Let me hear from you very soon.
102,"In marrying your nephew, I should not consider myself as quitting that sphere."
103,"Bennet was restored to her usual querulous serenity; and, by the middle of June, Kitty was so much recovered as to be able to enter Meryton without tears; an event of such happy promise as to make Elizabeth hope that by the following Christmas she might be so tolerably reasonable as not to menti..."
104,I am no longer surprised at your knowing _only_ six accomplished women.


In [2]:
import Helpers
helper = Helpers.Exquisite_Corpse

In [3]:
import spacy
encoder = spacy.load('en')

train_lines['tokens'] = helper.text_to_tokens(train_lines['line'], encoder)
train_lines[['line','tokens']][100:105]

Unnamed: 0,line,tokens
100,"She is a selfish, hypocritical woman, and I have no opinion of her.","[she, is, a, selfish, ,, hypocritical, woman, ,, and, i, have, no, opinion, of, her, .]"
101,Let me hear from you very soon.,"[let, me, hear, from, you, very, soon, .]"
102,"In marrying your nephew, I should not consider myself as quitting that sphere.","[ , in, marrying, your, nephew, ,, i, should, not, consider, myself, as, quitting, that, sphere, .]"
103,"Bennet was restored to her usual querulous serenity; and, by the middle of June, Kitty was so much recovered as to be able to enter Meryton without tears; an event of such happy promise as to make Elizabeth hope that by the following Christmas she might be so tolerably reasonable as not to menti...","[bennet, was, restored, to, her, usual, querulous, serenity, ;, and, ,, by, the, middle, of, june, ,, kitty, was, so, much, recovered, as, to, be, able, to, enter, meryton, without, tears, ;, an, event, of, such, happy, promise, as, to, make, elizabeth, hope, that, by, the, following, christmas,..."
104,I am no longer surprised at your knowing _only_ six accomplished women.,"[ , i, am, no, longer, surprised, at, your, knowing, _, only, _, six, accomplished, women, .]"


In [4]:
import pickle
import os

lexicon = helper.make_lexicon(token_seqs=train_lines['tokens'], min_freq=2)

filename = 'data/sentences_lexicon.pkl'

if not os.path.exists(filename):
    open(filename, 'w+').close()

with open(filename, 'wb') as f:
    pickle.dump(lexicon, f)

lexicon sample (6160 total items):
[('knowledge', 392), ('green', 394), ('purpose', 2108), ('exposing', 396), ('deliberation', 397)]


In [5]:
print(type(lexicon))
print(len(lexicon))

<class 'dict'>
6160


In [6]:
lexicon_lookup = helper.get_lexicon_lookup(lexicon)

LEXICON LOOKUP SAMPLE:
[(500, 'surmise'), (501, 'perplexity'), (502, 'safie'), (503, 'elevates'), (504, 'chimney'), (505, 'leads'), (506, 'sides'), (507, 'date'), (508, 'birds'), (509, 'cure')]


In [8]:
train_lines['line_ids'] = helper.tokens_to_ids(all_tokens=train_lines['tokens'], lexicon=lexicon)
train_lines[['tokens','line_ids']][500:510]

Unnamed: 0,tokens,line_ids
500,"[the, same, lulling, sounds, acted, as, a, lullaby, to, my, too, keen, sensations, ;, when, i, placed, my, head, upon, my, pillow, ,, sleep, crept, over, me, ;, i, felt, it, as, it, came, and, blessed, the, giver, of, oblivion, .]","[1738, 3074, 1, 3369, 4201, 1712, 468, 1, 1893, 695, 1304, 2732, 2370, 2892, 4652, 4327, 3222, 695, 895, 4403, 695, 1, 1194, 4065, 4924, 559, 5229, 2892, 4327, 1628, 2897, 1712, 2897, 6076, 3034, 451, 1738, 1, 1026, 4071, 3860]"
501,"[i, am, afraid, you, will, not, be, able, to, make, it, out, ,, but, i, hardly, know, what, i, have, written, .]","[4327, 733, 4377, 1181, 3097, 1615, 15, 2795, 1893, 3265, 2897, 2269, 1194, 3066, 4327, 4212, 5376, 2986, 4327, 5082, 1462, 3860]"
502,"[it, is, of, no, consequence, .]","[2897, 1447, 1026, 1674, 4302, 3860]"
503,"[elizabeth, saw, even, this, last, resource, ,, her, excellent, dispositions, and, irreproachable, conduct, ,, about, to, fail, the, accused, ,, when, ,, although, violently, agitated, ,, she, desired, permission, to, address, the, court, .]","[1581, 4983, 3932, 5990, 4647, 1465, 1194, 1217, 4271, 4913, 3034, 1513, 2126, 1194, 3946, 1893, 132, 1738, 850, 1194, 4652, 1194, 2492, 2691, 1503, 1194, 3289, 3867, 3715, 1893, 139, 1738, 2006, 3860]"
504,"[ , but, you, --, how, are, you, ?, cried, elizabeth, .]","[708, 3066, 1181, 1613, 1860, 288, 1181, 982, 5857, 1581, 3860]"
505,"[on, both, sides, it, was, only, assertion, .]","[937, 4896, 506, 2897, 1920, 3486, 577, 3860]"
506,"[it, was, a, handsome, modern, building, ,, well, situated, on, rising, ground, .]","[2897, 1920, 468, 4149, 4605, 5590, 1194, 2102, 5606, 937, 3815, 987, 3860]"
507,"[six, years, had, passed, since, then, :, , _, i, _, was, a, wreck, ,, but, nought, had, changed, in, those, savage, and, enduring, scenes, .]","[749, 4667, 3086, 3510, 5687, 893, 5972, 421, 2082, 4327, 2082, 1920, 468, 1948, 1194, 3066, 3783, 3086, 265, 398, 4143, 612, 3034, 881, 3990, 3860]"
508,"[let, me, then, advise, you, ,, dear, sir, ,, to, console, yourself, as, much, as, possible, ,, to, throw, off, your, unworthy, child, from, your, affection, for, ever, ,, and, leave, her, to, reap, the, fruits, of, her, own, heinous, offense, .]","[3274, 5229, 893, 2214, 1181, 1194, 4484, 4881, 1194, 1893, 3810, 150, 1712, 685, 1712, 3539, 1194, 1893, 4499, 112, 1614, 5762, 3307, 4888, 1614, 1353, 1407, 5344, 1194, 3034, 682, 1217, 1893, 54, 1738, 5463, 1026, 1217, 97, 1, 5345, 3860]"
509,"[it, was, reasonable, ,, however, ,, to, hope, that, they, would, not, continue, long, .]","[2897, 1920, 3271, 1194, 3703, 1194, 1893, 551, 2929, 2529, 4444, 1615, 878, 536, 3860]"


In [9]:
from keras.preprocessing.sequence import pad_sequences

max_length = max( [len(ids) for ids in train_lines['line_ids']])

train_padded_ids = pad_sequences(train_lines['line_ids'], maxlen=max_length)
print(train_padded_ids)

print("SHAPE:", train_padded_ids.shape)

Using TensorFlow backend.


[[   0    0    0 ..., 1217 2140 3860]
 [   0    0    0 ..., 3086 3379 3860]
 [   0    0    0 ..., 5990 3767 3860]
 ..., 
 [   0    0    0 ..., 1738  567 3860]
 [   0    0    0 ..., 1200 5509 3860]
 [   0    0    0 ..., 1738 1846 3860]]
SHAPE: (8639, 223)


In [10]:
pandas.DataFrame( list(zip(["-"] + train_lines['tokens'].loc[0], 
                      train_lines['tokens'].loc[0])),
                 columns=['input word', 'output word'])

Unnamed: 0,input word,output word
0,-,
1,,she
2,she,is
3,is,so
4,so,fond
5,fond,of
6,of,
7,,forster
8,forster,","
9,",",said


In [11]:
print(pandas.DataFrame(list(zip(train_padded_ids[0,:-1], train_padded_ids[0, 1:])), columns=['input words','output words']))

     input words  output words
0              0             0
1              0             0
2              0             0
3              0             0
4              0             0
5              0             0
6              0             0
7              0             0
8              0             0
9              0             0
10             0             0
11             0             0
12             0             0
13             0             0
14             0             0
15             0             0
16             0             0
17             0             0
18             0             0
19             0             0
20             0             0
21             0             0
22             0             0
23             0             0
24             0             0
25             0             0
26             0             0
27             0             0
28             0             0
29             0             0
..           ...           ...
192     

In [12]:
from keras.models import Model
from keras.layers import Input, Dense, TimeDistributed
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import GRU

def create_model(seq_input_len, n_input_nodes, n_embedding_nodes, n_hidden_nodes, stateful=False, batch_size=None):

    input_layer = Input(batch_shape=(batch_size, seq_input_len), name='input_layer')
    
    embedding_layer = Embedding(input_dim=n_input_nodes,
                               output_dim=n_embedding_nodes,
                               mask_zero=True, name='embedding_layer')(input_layer)
    
    gru_layer1 = GRU(n_hidden_nodes,
                    return_sequences=True,
                    stateful=stateful,
                    name='hidden_layer1')(embedding_layer)
    
    gru_layer2 = GRU(n_hidden_nodes,
                    return_sequences=True,
                    stateful=stateful,
                    name='hidden_layer2')(gru_layer1)
    
    output_layer = TimeDistributed(Dense(n_input_nodes, activation="softmax"),
                                  name='output_layer')(gru_layer2)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
    
    return model
    

In [13]:
model = create_model(seq_input_len=train_padded_ids.shape[-1] - 1,
                    n_input_nodes = len(lexicon) + 1,
                    n_embedding_nodes = 300,
                    n_hidden_nodes = 500)

In [14]:
len(lexicon)

6160

In [15]:
model.fit(x=train_padded_ids[:, :-1], 
          y=train_padded_ids[:, 1:, None], 
          epochs=10,
          batch_size=20)

model.save_weights('corpse_weights10.h5')

Epoch 1/10
 100/8639 [..............................] - ETA: 1612s - loss: 8.7049

KeyboardInterrupt: 

In [16]:
len(lexicon)

6160

In [24]:
predictor_model = create_model(seq_input_len=1,
                              n_input_nodes=len(lexicon) + 1,
                              n_embedding_nodes=300,
                              n_hidden_nodes = 500,
                              stateful=True,
                              batch_size=1)

predictor_model.load_weights('corpse_weights5.h5')

ValueError: Dimension 0 in both shapes must be equal, but are 6161 and 3283 for 'Assign' (op: 'Assign') with input shapes: [6161,300], [3283,300].

In [22]:
test_line = "Beyond the fields of my own sorrow, I had to believe "

words = [ [word.lower_ for word in encoder(test_line)] ]
words

[['beyond',
  'the',
  'fields',
  'of',
  'my',
  'own',
  'sorrow',
  ',',
  'i',
  'had',
  'to',
  'believe']]

In [23]:
model_input = helper.tokens_to_ids(words, lexicon)
model_input

[[3517, 1738, 3254, 1026, 695, 97, 949, 1194, 4327, 3086, 1893, 5132]]

In [None]:
response = helper.generateResponse(model, lexicon_lookup, model_input)