In [1]:
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.utils import pad_sequences,to_categorical
from pickle import dump,load
import os
import numpy as np

In [2]:
raw_text = "Sing a song of sixpence, A pocket full of rye. Four and twenty blackbirds, Baked in a pie. When the pie was opened The birds began to sing; Wasn't that a dainty dish, To set before the king. The king was in his counting house, Counting out his money; The queen was in the parlour, Eating bread and honey. The maid was in the garden, Hanging out the clothes, When down came a blackbird And pecked off her nose." 
raw_text

"Sing a song of sixpence, A pocket full of rye. Four and twenty blackbirds, Baked in a pie. When the pie was opened The birds began to sing; Wasn't that a dainty dish, To set before the king. The king was in his counting house, Counting out his money; The queen was in the parlour, Eating bread and honey. The maid was in the garden, Hanging out the clothes, When down came a blackbird And pecked off her nose."

In [3]:
def load_doc(filename):
    file = open(filename,'r')
    text = file.read()
    file.close()
    return text
def save_doc(lines,filename):
    data = '\n'.join(lines)
    file = open(filename,'w')
    file.write(data)
    file.close()

In [4]:
tokens = raw_text.split()
raw_text = ' '.join(tokens)
length = 10
sequences = list()
for i in range(length,len(raw_text)):
    seq = raw_text[i-length:i+1]
    sequences.append(seq)
out_filename = 'char/saved.txt'
if not os.path.exists(out_filename):
    save_doc(sequences,out_filename)

In [5]:
filename = out_filename
raw_text = load_doc(filename)
lines = raw_text.split('\n')
lines[:20]

['Sing a song',
 'ing a song ',
 'ng a song o',
 'g a song of',
 ' a song of ',
 'a song of s',
 ' song of si',
 'song of six',
 'ong of sixp',
 'ng of sixpe',
 'g of sixpen',
 ' of sixpenc',
 'of sixpence',
 'f sixpence,',
 ' sixpence, ',
 'sixpence, A',
 'ixpence, A ',
 'xpence, A p',
 'pence, A po',
 'ence, A poc']

In [6]:
chars = sorted(list(set(raw_text)))
mapping = dict((c,i) for i,c in enumerate(chars))
mapping

{'\n': 0,
 ' ': 1,
 "'": 2,
 ',': 3,
 '.': 4,
 ';': 5,
 'A': 6,
 'B': 7,
 'C': 8,
 'E': 9,
 'F': 10,
 'H': 11,
 'S': 12,
 'T': 13,
 'W': 14,
 'a': 15,
 'b': 16,
 'c': 17,
 'd': 18,
 'e': 19,
 'f': 20,
 'g': 21,
 'h': 22,
 'i': 23,
 'k': 24,
 'l': 25,
 'm': 26,
 'n': 27,
 'o': 28,
 'p': 29,
 'q': 30,
 'r': 31,
 's': 32,
 't': 33,
 'u': 34,
 'w': 35,
 'x': 36,
 'y': 37}

In [7]:
sequences = list()
for line in lines:
    enc_seq = [mapping[char] for char in line]
    sequences.append(enc_seq)

In [8]:
vocab_size = len(mapping)
sequences = np.array(sequences)
x,y = sequences[:,:-1],sequences[:,-1]
print(x.shape)
sequences = [to_categorical(l,num_classes=vocab_size) for l in x]
x = np.array(sequences)
print(x.shape)
y = to_categorical(y,num_classes=vocab_size)
y = np.array(y)
print(y.shape)

(399, 10)
(399, 10, 38)
(399, 38)


In [9]:
model = Sequential([
    LSTM(75,input_shape=(x.shape[1],x.shape[2])),
    Dense(100,activation='relu'),
    Dense(vocab_size,activation='softmax')
])
model.summary()

  super().__init__(**kwargs)


In [10]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(x,y,epochs=150,verbose=2)

Epoch 1/150
13/13 - 2s - 171ms/step - accuracy: 0.0727 - loss: 3.6293
Epoch 2/150
13/13 - 0s - 11ms/step - accuracy: 0.1905 - loss: 3.5573
Epoch 3/150
13/13 - 0s - 10ms/step - accuracy: 0.1905 - loss: 3.2923
Epoch 4/150
13/13 - 0s - 12ms/step - accuracy: 0.1905 - loss: 3.0927
Epoch 5/150
13/13 - 0s - 12ms/step - accuracy: 0.1905 - loss: 3.0180
Epoch 6/150
13/13 - 0s - 14ms/step - accuracy: 0.1905 - loss: 2.9931
Epoch 7/150
13/13 - 0s - 12ms/step - accuracy: 0.1905 - loss: 2.9831
Epoch 8/150
13/13 - 0s - 14ms/step - accuracy: 0.1905 - loss: 2.9591
Epoch 9/150
13/13 - 0s - 14ms/step - accuracy: 0.1905 - loss: 2.9425
Epoch 10/150
13/13 - 0s - 13ms/step - accuracy: 0.1905 - loss: 2.9196
Epoch 11/150
13/13 - 0s - 12ms/step - accuracy: 0.2306 - loss: 2.8802
Epoch 12/150
13/13 - 0s - 14ms/step - accuracy: 0.2180 - loss: 2.8486
Epoch 13/150
13/13 - 0s - 13ms/step - accuracy: 0.2331 - loss: 2.7911
Epoch 14/150
13/13 - 0s - 12ms/step - accuracy: 0.2481 - loss: 2.7338
Epoch 15/150
13/13 - 0s - 11

<keras.src.callbacks.history.History at 0x20fca3ce650>

In [11]:
if not os.path.exists('char/model.keras'):
    model.save('char/model.keras')
if not os.path.exists('char/mapping.pkl'):
    dump(mapping,open('char/mapping.pkl','wb'))

In [12]:
def generate_seq(model,mapping,seq_length,seed_text,n_chars):
    in_text = seed_text
    for _ in range(n_chars):
        encoded = [mapping[char] for char in in_text]
        encoded = pad_sequences([encoded],maxlen=seq_length,truncating='pre')
        encoded = to_categorical(encoded,num_classes=vocab_size)
        ypred = model.predict(encoded,verbose=0)
        ypred = np.argmax(ypred)
        out_char = ''
        for char, index in mapping.items():
            if index == ypred:
                out_char = char
                break
        in_text += out_char
    return in_text

In [13]:
model = load_model('char/model.keras')
mapping = load(open('char/mapping.pkl','rb'))

start of rhyme

In [14]:
print(generate_seq(model,mapping,10,'Sing a son',20))

Sing a song of sixpence, A poc


mid of rhyme

In [15]:
print(generate_seq(model,mapping,10,'A pocket full of',20))
print(generate_seq(model,mapping,10,' The king was in',20))

A pocket full of rye. Four and twent
 The king was in his counting house,


test example

In [16]:
print(generate_seq(model,mapping,10,'welcome to ',20))

welcome to helrlf ouenty he uoo
