In [1]:
import re
import json
import numpy as np

In [2]:
from keras.models import load_model, Sequential
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout, Flatten

Using TensorFlow backend.


In [3]:
model = load_model('model20180117_4.h5')

In [4]:
def load_dict(filename):
    with open(filename, 'r') as f:
        return json.load(f)

def load_index_word_map(word2ind_filename='word2ind', ind2word_filename='ind2word'):
    word2ind = load_dict(word2ind_filename)
    ind2word = load_dict(ind2word_filename)
    return word2ind, ind2word

In [5]:
word2ind, ind2word = load_index_word_map()

In [6]:
apos_end_pattern = r"'( cause| d| em| ll| m| n| re| s| til| till| twas| ve) (?!')"
apos_start_pattern = r" (d |j |l |ol |y )'"
apos_double_pattern = r" ' n ' "

In [7]:
def input_cleaning(sentence):
    sentence = sentence.lower()
    sentence = sentence.replace("''cause", "'cause")
    sentence = sentence.replace("n't", " n't")
    sentence = sentence.replace("''", '"')
    sentence = re.sub(r'[\n!"\(\),-.0-9:?\[\]]', lambda x: ' '+x.group(0)+' ', sentence)
    sentence = re.sub(r"\w+in'$|\w+in'\s", lambda m: m.group(0).replace("'", 'g'), sentence)
    sentence = sentence.replace("'", " ' ")
    # recover 'cause, 'd, 'em, 'll, 'm, 'n, 're, 's, 'til, 'till, 'twas
    sentence = re.sub(apos_end_pattern, lambda m: m.group(0)[:1]+m.group(0)[2:], sentence)
    # recover d', j', l', ol', y'
    sentence = re.sub(apos_start_pattern, lambda m: m.group(0)[:-2]+m.group(0)[-1:], sentence)
    # recover 'n'
    sentence = re.sub(apos_double_pattern, lambda m: m.group(0)[:2]+m.group(0)[3]+m.group(0)[-2:], sentence)
    sentence = sentence.replace(" n ' t ", " n't ")
    sentence = re.sub(r' {2,}', ' ', sentence)
    sentence = sentence.strip()
    sentence = sentence.split(' ')
    return sentence

In [8]:
def tokenise(words):
    return [word2ind.get(word, 0) for word in words]

In [9]:
def detokenise(tokens):
    return [ind2word[str(ind)] for ind in tokens]

In [10]:
unapos_end_pattern = r" '(d|em|ll|m|n|re|s|til|till|twas|ve) "
unapos_start_pattern = r" (d|j|l|ol|y)' "
unapos_double_pattern = r" 'n' "

In [11]:
def formatting(sentence):
    sentence = re.sub(r'[\(\[] ', lambda x: x.group(0)[:-1], sentence)
    sentence = re.sub(r' [\)\].!?,:]', lambda x: x.group(0)[1:], sentence)
    sentence = re.sub(r'" .+ "', lambda x: x.group(0).replace('" ', '"').replace(' "', '"'), sentence)
    sentence = re.sub(r'^[\[\("]?\w|[?.!]"? \w', lambda x: x.group(0).upper(), sentence)
    sentence = re.sub(r' i ', ' I ', sentence)
    sentence = re.sub(unapos_start_pattern, lambda x: x.group(0)[:-1], sentence)
    sentence = re.sub(unapos_end_pattern, lambda x: x.group(0)[1:], sentence)
    sentence = re.sub(unapos_double_pattern, lambda x: x.group(0)[1:-1], sentence)
    sentence = sentence.replace(" n't ", "n't ")
    # doin ' => doin'
    sentence = re.sub(r"(\win) (' )", lambda m: m.group(1)+m.group(2), sentence)
    sentence = re.sub(r' {2,}', ' ', sentence)
    # first letter alignment
    sentence = re.sub(r'(\n) (\w)', lambda m: m.group(1)+m.group(2).upper(), sentence)
    sentence = sentence.strip()
    return sentence

In [12]:
def semantic_not_end(tokens):
    unequal_square_quotes = tokens.count(word2ind['[']) != tokens.count(word2ind[']'])
    unequal_brackets = tokens.count(word2ind['(']) != tokens.count(word2ind[')'])
    odd_double_quotes = tokens.count(word2ind['"']) % 2 != 0
    nonfinished_sentence = ind2word[str(tokens[-1])] not in ['.', '!', '?', '"', '\n']
    not_end = unequal_square_quotes or unequal_brackets or odd_double_quotes or nonfinished_sentence
    return not_end

In [13]:
def implement(seed_text, n_words, maxlen=10, must_stop=200):
    cleaned = input_cleaning(seed_text)
    padded_input_tokens = tokenise(cleaned)
    res_tokens = [token for token in padded_input_tokens]
    while (n_words > 0 or semantic_not_end(padded_input_tokens)) and must_stop > 0:
        padded_input_tokens = pad_sequences([padded_input_tokens], maxlen=maxlen)
        probs = model.predict(padded_input_tokens)[0]
        predicted = np.random.choice(list(range(len(probs))), p=probs)
        padded_input_tokens = padded_input_tokens[0].tolist()
        padded_input_tokens.append(predicted)
        res_tokens.append(predicted)
        n_words -= 1
        must_stop -= 1
    detokenised = detokenise(res_tokens)
    return formatting(' '.join(detokenised))

In [14]:
# replace seed_text with your own
res = implement(seed_text="I can't sleep as she said she liked me", n_words=500, maxlen=20)

In [15]:
print(res)

I can't sleep as she said she liked me alive to you 
I love you and I've been asking so long for me baby this love just 
 
I will be dreaming of how I'm alright 
When I go under the street 
Afraid of the sun and stones 
Oh the lord eve in the park 
 
All 
For love is time 
Your sadness is gonna go 
And I'm a scandal of you 
Here we're hanging in the farm 
 
Take me tomorrow 
And say that it's near 
Baby help it when I fall back breaking you under that side 
In your own chances again 
There he'll mend the dark 
But it's only harder 
And all there brings 
Time in sea I don't feel impossible 
 
You are on my sleeve, I gotta make myself good 
Because I want some one that's a bite of 
Play, lame we gotta celebrate 
Was you comin' home 
Hiding for a cottage top around the sun fallin' outside 
There was as high as long as far


In [36]:
print(res)

I can't sleep as she said she liked me 
I'm in bed and tell you that wasn't giving me a call 
What him can't get it for yourself ' - dig 
 
The birds and marian are the closing 
In my mind and then run 
And break the spirit in the land 
 (repeat chorus) 
They will see the think of the crown in the sword 
The hell leaves their hearts no we wait... 
Ain't nothin' I'm not on my side 
So to die (good night) 
You got someone else 
That man can't find me to mouth? 
I'm leaving the girl that I'm feeling around 
 
I've found the world of love and I can't come nowhere to make what I do 
Now sound right home to me 
I've seen a way that you're there 
 'cause my name gets I got 
 
I knew that the key didn't end in our saddle 
On the jeep 
It's going right lazy 
And even love you in miami


In [16]:
print(res)

I can't sleep as she said she liked me 
Goodbye and she said 
 
What for us now 
It used to wait so falling 
Once 
I want to stop up 
There's the truth someday 
You have to buy a sense 
Break me up 's, far round my heart 
Laugh on the roses on the carousel 
You check apart, we came for me 
You're so long and harm 
 
Learn to play the captain 
I remember something I grew you 
Stars's haunting blame 
Straight I've been nothing when you can say 
Someone made all you'll play 
 
The darkness that eternal of the pain 
Jesus, tell a deeper appear! 
 (i see, cold) 
And when the finest lights is only flowing above 
I last like a cadillacs people were games 
1 0 0 - the boy blues at the wings 
T - la - da - s - hm - huh -, " grade maids 
 
Heard, da - la - l, got ya hotter 
I, tv sweet!
