In [1]:
import pandas as pd
import numpy as np
from IPython.display import HTML

from __future__ import print_function
import keras
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, RNN, SimpleRNNCell, SimpleRNN
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

Using TensorFlow backend.


tf.estimator package not installed.
tf.estimator package not installed.


In [2]:
companies = pd.read_csv('names.txt', header=None)
companies.head()

Unnamed: 0,0
0,Aaberg
1,Aalst
2,Aara
3,Aaren
4,Aarika


In [4]:
names = companies[0].values
text = '\n'.join(names)

chars = sorted(list(set(text)))
print('total chars: {}'.format(len(chars)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

total chars: 56


In [5]:
maxlen = 10
step = 3

sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))
print('First 10 sequences and next chars:')
for i in range(10):
    print('[{}]:[{}]'.format(sentences[i], next_chars[i]))

Number of sequences: 52450
First 10 sequences and next chars:
[Aaberg
Aal]:[s]
[erg
Aalst
]:[A]
[
Aalst
Aar]:[a]
[lst
Aara
A]:[a]
[
Aara
Aare]:[n]
[ra
Aaren
A]:[a]
[Aaren
Aari]:[k]
[en
Aarika
]:[A]
[Aarika
Aar]:[o]
[ika
Aaron
]:[A]


In [6]:
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1
print('Size of X: {:.2f} MB'.format(X.nbytes/1024/1024))
print('Size of y: {:.2f} MB'.format(y.nbytes/1024/1024))

Vectorization...
Size of X: 28.00 MB
Size of y: 2.00 MB


In [7]:
# ### Initialization
# 
# Now we are ready to create a recurrent model.  Keras contains three types of recurrent layers:
# 
#  * `SimpleRNN`, a fully-connected RNN where the output is fed back to input.
#  * `LSTM`, the Long-Short Term Memory unit layer.
#  * `GRU`, the Gated Recurrent Unit layer.
# 
# See https://keras.io/layers/recurrent/ for more information.

# Number of hidden units to use:
nb_units = 64

model = Sequential()

# Recurrent layers supported: SimpleRNN, LSTM, GRU:
model.add(LSTM(nb_units, input_shape=(maxlen, len(chars))))

# To stack multiple RNN layers, all RNN layers except the last one need
# to have "return_sequences=True".  An example of using two RNN layers:
#model.add(SimpleRNN(16,
#                    input_shape=(maxlen, len(chars)),
#                    return_sequences=True))
#model.add(SimpleRNN(32))

model.add(Dense(units=len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer)

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 64)                30976     
_________________________________________________________________
dense_1 (Dense)              (None, 56)                3640      
_________________________________________________________________
activation_1 (Activation)    (None, 56)                0         
Total params: 34,616
Trainable params: 34,616
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [30]:
generated_file = open("generated.txt","w")
class SampleResult(keras.callbacks.Callback):

    def on_epoch_end(self, epoch, logs={}):

        start_index = random.randint(0, len(text) - maxlen - 1)

        for diversity in [0.2, 0.5, 1.0, 1.2]:
            generated = ''
            sentence = text[start_index: start_index + maxlen]
            generated += sentence
            print()
            print('----- Generating with diversity',
                  diversity, 'seed: "' + sentence + '"')
            sys.stdout.write(generated)
            
         

            for i in range(100):
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x[0, t, char_indices[char]] = 1.

                preds = self.model.predict(x, verbose=0)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += next_char
                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                generated_file.write(generated)
                sys.stdout.flush()
        print('\n\n')
sample_callback = SampleResult()

In [31]:
history = model.fit(X, y, 
                        epochs=10, 
                        batch_size=512,
                        verbose=2,
                       callbacks=[sample_callback])

generated_file.close()

Epoch 1/10
 - 11s - loss: 0.5997

----- Generating with diversity 0.2 seed: "helle
Mech"
helle
Mechlee
Mechell
Meceley
Mecely
Meckin
Meciav
Mecie
Meceley
Mecela
Mecer
Mecela
Mecer
Mecele
Meckerty
Mec
----- Generating with diversity 0.5 seed: "helle
Mech"
helle
Mechlie
Meceline
Mecrelli
Mecrellia
Merlich
Merlick
Merlick
Merlick
Merlige
Merlia
Merlinda
Merlieda
Mer
----- Generating with diversity 1.0 seed: "helle
Mech"
helle
Mechlee
Mecera
Mecky
Meciar
Mecye
Mederi
Meelar
Meeon
Meffa
Mehalen
Mehgam
Miggi
Miggi
Migging
Milg
Mime
----- Generating with diversity 1.2 seed: "helle
Mech"
helle
Mechliah
Meckie
Meciena
Meciento
Mecieusza
Mecut
Ceed
Cuel
Rucans
Ruchy
Ruciberg
Ruckin
Ruccerfor
Ruckin


Epoch 2/10
 - 11s - loss: 0.5961

----- Generating with diversity 0.2 seed: "ta
Aretha
"
ta
Aretha
Aretis
Arei
Aseiti
Ashire
Ashiris
Ashrer
Ashren
Ashres
Ashrey
Ashre
Asharis
Asharssa
Ashussa
Ashuste
----- Generating with diversity 0.5 seed: "ta
Aretha
"
ta
Aretha
Arethia
Arethoine
Aretl
Arged
Arhes

'am\nCramer\n'