## Neural Language Modeling

### Develop a Word-Based Neural Language Model

##### Model 1: One-Word-In, One-Word-Out Sequences

In [4]:
from numpy import array
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding

# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
    in_text, result = seed_text, seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        encoded = array(encoded)
        # predict a word in the vocabulary
        yhat = model.predict(encoded, verbose=0)
        # Find the index of the word with the highest probability
        predicted_word_index = np.argmax(yhat)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == predicted_word_index:
                out_word = word
                break
        # append to input
        in_text, result = out_word, result + ' ' + out_word
    return result


# define the model
def define_model(vocab_size):
    model = Sequential()
    model.add(Embedding(vocab_size, 10, input_length=1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation='softmax'))
    # compile network
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    #plot_model(model, to_file='model.png', show_shapes=True)
    return model


# source text
data = """ In the mustardseed sun,
By full tilt river and switchback sea
Where the cormorants scud,
In his house on stilts high among beaks
And palavers of birds
This sandgrain day in the bent bay's grave
He celebrates and spurns
His driftwood thirty-fifth wind turned age;
Herons spire and spear.
Under and round him go
Flounders, gulls, on their cold, dying trails,
Doing what they are told,
Curlews aloud in the congered waves
Work at their ways to death,
And the rhymer in the long tongued room,
Who tolls his birthday bell,
Toesl towards the ambush of his wounds;
Herons, stepple stemmed, bless.
In the thistledown fall,
He sings towards anguish; finches fly
In the claw tracks of hawks
On a seizing sky; small fishes glide
Through wynds and shells of drowned
Ship towns to pastures of otters. He
In his slant, racking house
And the hewn coils of his trade perceives
Herons walk in their shroud,
The livelong river's robe
Of minnows wreathing around their prayer;
And far at sea he knows,
Who slaves to his crouched, eternal end
Under a serpent cloud,
Dolphins dyive in their turnturtle dust,
The rippled seals streak down
To kill and their own tide daubing blood
Slides good in the sleek mouth.
In a cavernous, swung
Wave's silence, wept white angelus knells.
Thirty-five bells sing struck
On skull and scar where his lovews lie wrecked,
Steered by the falling stars.
And to-morrow weeps in a blind cage
Terror will rage apart
Before chains break to a hammer flame
And love unbolts the dark
And freely he goes lost
In the unknown, famous light of great
And fabulous, dear God.
Dark is a way and light is a place,
Heaven that never was
Nor will be ever is alwas true,
And, in that brambled void,
Plenty as blackberries in the woods
The dead grow for His joy.
There he might wander bare
With the spirits of the horseshoe bay
Or the stars' seashore dead,
Marrow of eagles, the roots of whales
And wishbones of wild geese,
With blessed, unborn God and His Ghost,
And every soul His priest,
Gulled and chanter in youg Heaven's fold
Be at cloud quaking peace,
But dark is a long way.
He, on the earth of the night, alone
With all the living, prays,
Who knows the rocketing wind will blow
The bones out of the hills,
And the scythed boulders bleed, and the last
Rage shattered waters kick
Masts and fishes to the still quick stars,
Faithlessly unto Him
Who is the light of old
And air shaped Heaven where souls grow wild
As horses in the foam:
Oh, let me midlife mourn by the shrined
And druid herons' vows
The voyage to ruin I must run,
Dawn ships clouted aground,
Yet, though I cry with tumbledown tongue,
Count my blessings aloud:
Four elements and five
Senses, and man a spirit in love
Thangling through this spun slime
To his nimbus bell cool kingdom come
And the lost, moonshine domes,
And the sea that hides his secret selves
Deep in its black, base bones,
Lulling of spheres in the seashell flesh,
And this last blessing most,
That the closer I move
To death, one man through his sundered hulks,
The louder the sun blooms
And the tusked, ramshackling sea exults;
And every wave of the way
And gale I tackle, the whole world then,
With more triumphant faith
That ever was since the world was said,
Spins its morning of praise,
I hear the bouncing hills
Grow larked and greener at berry brown
Fall and the dew larks sing
Taller this thuderclap spring, and how
More spanned with angles ride
The mansouled fiery islands! Oh,
Holier then their eyes,
And my shining men no more alone
As I sail out to die"""

# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

# create word -> word sequences
sequences = list()
for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]

# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)

# define model
model = define_model(vocab_size)
# fit network
model.fit(X, y, epochs=500, verbose=2)
# evaluate
print(generate_seq(model, tokenizer, 'horses', 6))

Vocabulary Size: 376
Total Sequences: 635
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 1, 10)             3760      
                                                                 
 lstm_3 (LSTM)               (None, 50)                12200     
                                                                 
 dense_3 (Dense)             (None, 376)               19176     
                                                                 
Total params: 35136 (137.25 KB)
Trainable params: 35136 (137.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/500
20/20 - 1s - loss: 5.9283 - accuracy: 0.0472 - 1s/epoch - 58ms/step
Epoch 2/500
20/20 - 0s - loss: 5.9194 - accuracy: 0.0850 - 35ms/epoch - 2ms/step
Epoch 3/500
20/20 - 0s - loss: 5.9101 - accuracy: 0.0882 - 28ms/epoch - 1ms/s

#####  Model 2: Line-by-Line Sequence

In [5]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding


# generate a sequence from a language model
def generate_seq(model, tokenizer, max_length, seed_text, n_words):
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # pre-pad sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=max_length, padding='pre')
        # predict probabilities for each word
        yhat = model.predict(encoded, verbose=0)
        # Find the index of the word with the highest probability
        predicted_word_index = np.argmax(yhat)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == predicted_word_index:
                out_word = word
                break
        # append to input
        in_text += ' ' + out_word
    return in_text


# define the model
def define_model(vocab_size, max_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 10, input_length=max_length-1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation='softmax'))
    # compile network
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    #plot_model(model, to_file='model.png', show_shapes=True)
    return model

# source text
data = """In the mustardseed sun,\n
By full tilt river and switchback sea\n
Where the cormorants scud,\n
In his house on stilts high among beaks\n
And palavers of birds\n
This sandgrain day in the bent bay's grave\n
He celebrates and spurns\n
His driftwood thirty-fifth wind turned age;\n
Herons spire and spear.\n
Under and round him go\n
Flounders, gulls, on their cold, dying trails,\n
Doing what they are told,\n
Curlews aloud in the congered waves\n
Work at their ways to death,\n
And the rhymer in the long tongued room,\n
Who tolls his birthday bell,\n
Toesl towards the ambush of his wounds;\n
Herons, stepple stemmed, bless.\n
In the thistledown fall,\n
He sings towards anguish; finches fly\n
In the claw tracks of hawks\n
On a seizing sky; small fishes glide\n
Through wynds and shells of drowned\n
Ship towns to pastures of otters. He\n
In his slant, racking house\n
And the hewn coils of his trade perceives\n
Herons walk in their shroud,\n
The livelong river's robe\n
Of minnows wreathing around their prayer;\n
And far at sea he knows,\n
Who slaves to his crouched, eternal end\n
Under a serpent cloud,\n
Dolphins dyive in their turnturtle dust,\n
The rippled seals streak down\n
To kill and their own tide daubing blood\n
Slides good in the sleek mouth.\n
In a cavernous, swung\n
Wave's silence, wept white angelus knells.\n
Thirty-five bells sing struck\n
On skull and scar where his lovews lie wrecked,\n
Steered by the falling stars.\n
And to-morrow weeps in a blind cage\n
Terror will rage apart\n
Before chains break to a hammer flame\n
And love unbolts the dark\n
And freely he goes lost\n
In the unknown, famous light of great\n
And fabulous, dear God.\n
Dark is a way and light is a place,\n
Heaven that never was\n
Nor will be ever is alwas true,\n
And, in that brambled void,\n
Plenty as blackberries in the woods\n
The dead grow for His joy.\n
There he might wander bare\n
With the spirits of the horseshoe bay\n
Or the stars' seashore dead,\n
Marrow of eagles, the roots of whales\n
And wishbones of wild geese,\n
With blessed, unborn God and His Ghost,\n
And every soul His priest,\n
Gulled and chanter in youg Heaven's fold\n
Be at cloud quaking peace,\n
But dark is a long way.\n
He, on the earth of the night, alone\n
With all the living, prays,\n
Who knows the rocketing wind will blow\n
The bones out of the hills,\n
And the scythed boulders bleed, and the last\n
Rage shattered waters kick\n
Masts and fishes to the still quick stars,\n
Faithlessly unto Him\n
Who is the light of old\n
And air shaped Heaven where souls grow wild\n
As horses in the foam:\n
Oh, let me midlife mourn by the shrined\n
And druid herons' vows\n
The voyage to ruin I must run,\n
Dawn ships clouted aground,\n
Yet, though I cry with tumbledown tongue,\n
Count my blessings aloud:\n
Four elements and five\n
Senses, and man a spirit in love\n
Thangling through this spun slime\n
To his nimbus bell cool kingdom come\n
And the lost, moonshine domes,\n
And the sea that hides his secret selves\n
Deep in its black, base bones,\n
Lulling of spheres in the seashell flesh,\n
And this last blessing most,\n
That the closer I move\n
To death, one man through his sundered hulks,\n
The louder the sun blooms\n
And the tusked, ramshackling sea exults;\n
And every wave of the way\n
And gale I tackle, the whole world then,\n
With more triumphant faith\n
That ever was since the world was said,\n
Spins its morning of praise,\n
I hear the bouncing hills\n
Grow larked and greener at berry brown\n
Fall and the dew larks sing\n
Taller this thuderclap spring, and how\n
More spanned with angles ride\n
The mansouled fiery islands! Oh,\n
Holier then their eyes,\n
And my shining men no more alone\n
As I sail out to die\n """

# prepare the tokenizer on the source text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

# create line-based sequences
sequences = list()
for line in data.split('\n'):
    encoded = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(encoded)):
        sequence = encoded[:i+1]
        sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

# pad input sequences
max_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')
print('Max Sequence Length: %d' % max_length)

# split into input and output elements
sequences = array(sequences)
X, y = sequences[:,:-1],sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)

# define model
model = define_model(vocab_size, max_length)

# fit network
model.fit(X, y, epochs=500, verbose=2)

# evaluate model
print(generate_seq(model, tokenizer, max_length-1, 'horses', 4))
print(generate_seq(model, tokenizer, max_length-1, 'pastures', 4))


Vocabulary Size: 376
Total Sequences: 528
Max Sequence Length: 9
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 8, 10)             3760      
                                                                 
 lstm_4 (LSTM)               (None, 50)                12200     
                                                                 
 dense_4 (Dense)             (None, 376)               19176     
                                                                 
Total params: 35136 (137.25 KB)
Trainable params: 35136 (137.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/500
17/17 - 1s - loss: 5.9273 - accuracy: 0.0379 - 1s/epoch - 82ms/step
Epoch 2/500
17/17 - 0s - loss: 5.9023 - accuracy: 0.0777 - 43ms/epoch - 3ms/step
Epoch 3/500
17/17 - 0s - loss: 5.7443 - accuracy: 0.07

##### Model 3: Two-Words-In, One-Word-Out Sequence

In [6]:
from numpy import array
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.preprocessing.sequence import pad_sequences
from keras.utils import plot_model
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding


# generate a sequence from a language model
def generate_seq(model, tokenizer, max_length, seed_text, n_words):
    in_text = seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        # pre-pad sequences to a fixed length
        encoded = pad_sequences([encoded], maxlen=max_length, padding='pre')
        # predict probabilities for each word
        yhat = model.predict(encoded, verbose=0)
        # Find the index of the word with the highest probability
        predicted_word_index = np.argmax(yhat)
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == predicted_word_index:
                out_word = word
                break
        # append to input
        in_text += ' ' + out_word
    return in_text


# define the model
def define_model(vocab_size, max_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 10, input_length=max_length-1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation='softmax'))
    # compile network
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    #plot_model(model, to_file='model.png', show_shapes=True)
    return model


# source text
data = """ In the mustardseed sun,\n
By full tilt river and switchback sea\n
Where the cormorants scud,\n
In his house on stilts high among beaks\n
And palavers of birds\n
This sandgrain day in the bent bay's grave\n
He celebrates and spurns\n
His driftwood thirty-fifth wind turned age;\n
Herons spire and spear.\n
Under and round him go\n
Flounders, gulls, on their cold, dying trails,\n
Doing what they are told,\n
Curlews aloud in the congered waves\n
Work at their ways to death,\n
And the rhymer in the long tongued room,\n
Who tolls his birthday bell,\n
Toesl towards the ambush of his wounds;\n
Herons, stepple stemmed, bless.\n
In the thistledown fall,\n
He sings towards anguish; finches fly\n
In the claw tracks of hawks\n
On a seizing sky; small fishes glide\n
Through wynds and shells of drowned\n
Ship towns to pastures of otters. He\n
In his slant, racking house\n
And the hewn coils of his trade perceives\n
Herons walk in their shroud,\n
The livelong river's robe\n
Of minnows wreathing around their prayer;\n
And far at sea he knows,\n
Who slaves to his crouched, eternal end\n
Under a serpent cloud,\n
Dolphins dyive in their turnturtle dust,\n
The rippled seals streak down\n
To kill and their own tide daubing blood\n
Slides good in the sleek mouth.\n
In a cavernous, swung\n
Wave's silence, wept white angelus knells.\n
Thirty-five bells sing struck\n
On skull and scar where his lovews lie wrecked,\n
Steered by the falling stars.\n
And to-morrow weeps in a blind cage\n
Terror will rage apart\n
Before chains break to a hammer flame\n
And love unbolts the dark\n
And freely he goes lost\n
In the unknown, famous light of great\n
And fabulous, dear God.\n
Dark is a way and light is a place,\n
Heaven that never was\n
Nor will be ever is alwas true,\n
And, in that brambled void,\n
Plenty as blackberries in the woods\n
The dead grow for His joy.\n
There he might wander bare\n
With the spirits of the horseshoe bay\n
Or the stars' seashore dead,\n
Marrow of eagles, the roots of whales\n
And wishbones of wild geese,\n
With blessed, unborn God and His Ghost,\n
And every soul His priest,\n
Gulled and chanter in youg Heaven's fold\n
Be at cloud quaking peace,\n
But dark is a long way.\n
He, on the earth of the night, alone\n
With all the living, prays,\n
Who knows the rocketing wind will blow\n
The bones out of the hills,\n
And the scythed boulders bleed, and the last\n
Rage shattered waters kick\n
Masts and fishes to the still quick stars,\n
Faithlessly unto Him\n
Who is the light of old\n
And air shaped Heaven where souls grow wild\n
As horses in the foam:\n
Oh, let me midlife mourn by the shrined\n
And druid herons' vows\n
The voyage to ruin I must run,\n
Dawn ships clouted aground,\n
Yet, though I cry with tumbledown tongue,\n
Count my blessings aloud:\n
Four elements and five\n
Senses, and man a spirit in love\n
Thangling through this spun slime\n
To his nimbus bell cool kingdom come\n
And the lost, moonshine domes,\n
And the sea that hides his secret selves\n
Deep in its black, base bones,\n
Lulling of spheres in the seashell flesh,\n
And this last blessing most,\n
That the closer I move\n
To death, one man through his sundered hulks,\n
The louder the sun blooms\n
And the tusked, ramshackling sea exults;\n
And every wave of the way\n
And gale I tackle, the whole world then,\n
With more triumphant faith\n
That ever was since the world was said,\n
Spins its morning of praise,\n
I hear the bouncing hills\n
Grow larked and greener at berry brown\n
Fall and the dew larks sing\n
Taller this thuderclap spring, and how\n
More spanned with angles ride\n
The mansouled fiery islands! Oh,\n
Holier then their eyes,\n
And my shining men no more alone\n
As I sail out to die\n """

# integer encode sequences of words
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

# retrieve vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

# encode 2 words -> 1 word
sequences = list()
for i in range(2, len(encoded)):
    sequence = encoded[i-2:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

# pad sequences
max_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')
print('Max Sequence Length: %d' % max_length)

# split into input and output elements
sequences = array(sequences)
X, y = sequences[:,:-1],sequences[:,-1]
y = to_categorical(y, num_classes=vocab_size)

# define model
model = define_model(vocab_size, max_length)

# fit network
model.fit(X, y, epochs=500, verbose=2)

# evaluate model
print(generate_seq(model, tokenizer, max_length-1, 'druid herons', 5))
print(generate_seq(model, tokenizer, max_length-1, 'Who knows', 3))
print(generate_seq(model, tokenizer, max_length-1, 'Grow larked', 5))
print(generate_seq(model, tokenizer, max_length-1, 'larked and', 5))

Vocabulary Size: 376
Total Sequences: 634
Max Sequence Length: 3
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_5 (Embedding)     (None, 2, 10)             3760      
                                                                 
 lstm_5 (LSTM)               (None, 50)                12200     
                                                                 
 dense_5 (Dense)             (None, 376)               19176     
                                                                 
Total params: 35136 (137.25 KB)
Trainable params: 35136 (137.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/500
20/20 - 1s - loss: 5.9278 - accuracy: 0.0552 - 1s/epoch - 59ms/step
Epoch 2/500
20/20 - 0s - loss: 5.9168 - accuracy: 0.0726 - 35ms/epoch - 2ms/step
Epoch 3/500
20/20 - 0s - loss: 5.9014 - accuracy: 0.07