# Biblical quotes generator
The following is an biblical text generator that was trained using the first 1000 quotes of America's Standard Bible.
To upload files uncomment line.

In [0]:
import tensorflow as tf
from google.colab import files
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import csv
import os
#uploaded = files.upload()

In [9]:
#open csv file
with open("t_asv.csv") as file:
  csvreader = csv.reader(file, delimiter=",")
  quotes = []
  for row in csvreader:
    quotes.append(row[4])
file.close()
#lower corpus
quotes = [quote.lower() for quote in quotes]
#print number of quotes
print(len(quotes))
#print an example of a quote
print(quotes[12])
#select training size of 1000
quotes = quotes[:1000]

31104
and the earth brought forth grass, herbs yielding seed after their kind, and trees bearing fruit, wherein is the seed thereof, after their kind: and god saw that it was good.


In [11]:
#create tokenizer
tokenizer = Tokenizer()
#create word_index and registe words on quotes
tokenizer.fit_on_texts(quotes)
#print total number of words
total_words = len(tokenizer.word_index) + 1
print(total_words)

#convert text to sequences
input_sequences = []
for line in quotes:
  token_list = tokenizer.texts_to_sequences([line])[0]
  for i in range(1, len(token_list)):
    n_gram_sequence = token_list[:i+1]
    input_sequences.append(n_gram_sequence)

#get maximum sequence length
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
print(input_sequences.shape)

#prepare dataset with labels and inputs
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]


#convert labels to categorical representation
label = tf.keras.utils.to_categorical(label, num_classes=total_words)

1946
(23542, 64)


In [12]:
#create model
model = tf.keras.models.Sequential([
  tf.keras.layers.Embedding(total_words, 100, input_length=max_sequence_len-1),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150, return_sequences=True)),
  tf.keras.layers.Dropout(0.1),
  tf.keras.layers.LSTM(150),
  tf.keras.layers.Dense(total_words/2, activation='relu'),
  tf.keras.layers.Dense(total_words, activation='softmax')]
)

#compile and print model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 63, 100)           194600    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 63, 300)           301200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 63, 300)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 150)               270600    
_________________________________________________________________
dense_2 (Dense)              (None, 973)               146923    
_________________________________________________________________
dense_3 (Dense)              (None, 1946)              1895404   
Total params: 2,808,727
Trainable params: 2,808,727
Non-trainable params: 0
____________________________________________

In [13]:
#use callback for plus 90% accuracy and start training. It took about 3 hours of training to get 85% accuracy
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if logs.get('acc')>0.9:
      self.model.stop_training=True

myCallback = myCallback()
history = model.fit(predictors, label, epochs=100, verbose=1, callbacks=[myCallback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
  352/23542 [..............................] - ETA: 4:10 - loss: 0.4828 - acc: 0.8608

KeyboardInterrupt: ignored

In [0]:
#uncomment to save the model
#model.save('bibleModel.h5')

In [16]:
#generate text
seed_text = "He gives power to the weak and strength to the powerless"
next_words = 100
print(max_sequence_len)
for _ in range(next_words):
  token_list = tokenizer.texts_to_sequences([seed_text])[0]
  token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
  predicted = model.predict_classes(token_list, verbose=0)
  output_word = ""
  for word, index in tokenizer.word_index.items():
    if index == predicted:
      output_word = word
      break
  seed_text += " " + output_word
print(seed_text)

64
He gives power to the weak and strength to the powerless sheep and go and now lest he put forth his trained men born in his house three hundred and eighteen and pursued as far against child for now i know that thou fearest unto her thou art a fair woman to look upon the death of the child and she sat over against him and lifted up her voice and wept at the field and breathed with him at the land of canaan even with them saying he hath heard the voice of whence god hath seen mine affliction and the labor of my hands and rebuked thee yesternight and


In [19]:
#import os
#print(os.getcwd())
#print(os.listdir('./checkpoints'))
#print('-------')
#print(os.listdir('.'))


/content
['biblegc.data-00001-of-00002', 'checkpoint', 'biblegc.index', 'biblegc.data-00000-of-00002']
-------
['.config', 't_asv.csv', 'checkpoints', 'bibleModel.h5', 'sample_data']


In [0]:
#uncomment to download model. Only works if the model was previously saved

files.download('bibleModel.h5')