In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

pd.set_option('display.max_columns',100)
pd.set_option('display.max_rows',100)
pd.set_option("display.precision", 2)

#plt.style.use('dark_background')

# Read file

In [4]:
with open('John.txt','r', encoding='utf-8') as f:
    text_raw = f.read()
# end

print(text_raw[:1000])

[1] In the beginning was the Word, and the Word was with God, and the Word was God.
[2] The same was in the beginning with God.
[3] All things were made by him; and without him was not any thing made that was made.
[4] In him was life; and the life was the light of men.
[5] And the light shineth in darkness; and the darkness comprehended it not.
[6] There was a man sent from God, whose name was John.
[7] The same came for a witness, to bear witness of the Light, that all men through him might believe.
[8] He was not that Light, but was sent to bear witness of that Light.
[9] That was the true Light, which lighteth every man that cometh into the world.
[10] He was in the world, and the world was made by him, and the world knew him not.
[11] He came unto his own, and his own received him not.
[12] But as many as received him, to them gave he power to become the sons of God, even to them that believe on his name:
[13] Which were born, not of blood, nor of the will of the flesh, nor of the

# Clean text

In [15]:
text = text_raw.lower()
text = re.sub(r'\[\d+\]','',text)
text = re.sub(r'\s+',' ',text)
text = re.sub(r'[:\(\)\!\?]','',text)
text = re.sub(r'\. ',' . ',text)
text = re.sub(r', ',' , ',text)
text = re.sub(r'; ',' ; ',text)
text = re.sub(r'john\d+','',text)
text = text.strip()

print(text[:1000])

in the beginning was the word , and the word was with god , and the word was god . the same was in the beginning with god . all things were made by him ; and without him was not any thing made that was made . in him was life ; and the life was the light of men . and the light shineth in darkness ; and the darkness comprehended it not . there was a man sent from god , whose name was john . the same came for a witness , to bear witness of the light , that all men through him might believe . he was not that light , but was sent to bear witness of that light . that was the true light , which lighteth every man that cometh into the world . he was in the world , and the world was made by him , and the world knew him not . he came unto his own , and his own received him not . but as many as received him , to them gave he power to become the sons of god , even to them that believe on his name which were born , not of blood , nor of the will of the flesh , nor of the will of man , but of god . 

# Encode text

In [41]:
gram_len = 10

words = np.array(text.split(' '))

grams = []
for i in range(len(words)-gram_len+1):
    grams.append(" ".join(words[i:i+gram_len]))  
# end
grams = np.array(grams)
vocab = np.unique(grams)
vocab[:20]

array([', a bone of him shall not be broken .',
       ', a feast of the jews , was nigh .',
       ', a little while , and ye shall not see',
       ', a little while , and ye shall see me',
       ', a little while we cannot tell what he saith',
       ', a man can receive nothing , except it be',
       ', a man that hath told you the truth ,',
       ', a man that is called jesus made clay ,',
       ', a multitude being in that place . afterward jesus',
       ', a ruler of the jews the same came to',
       ', a stone . the day following jesus would go',
       ', about an hundred pound weight . then took they',
       ', about fifteen furlongs off and many of the jews',
       ', abraham is our father . jesus saith unto them',
       ', after me cometh a man which is preferred before',
       ', after that he was risen from the dead .',
       ', after that the lord had given thanks when the',
       ', after the manner of the purifying of the jews',
       ', all men will belie

In [42]:
n_grams = grams.shape[0]
n_vocab = vocab.shape[0]

from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder()
vocab_enc = LE.fit_transform(vocab).astype(int)
grams_enc = LE.transform(grams).astype(int)

df = pd.DataFrame({'grams':grams,'grams_enc':grams_enc})
df.value_counts()

grams                                                      grams_enc
. verily , verily , i say unto you ,                       2618         9
, verily , verily , i say unto you ,                       1611         8
verily , verily , i say unto you , he                      19901        5
, verily , i say unto you , he that                        1597         5
them , verily , verily , i say unto you                    17846        4
                                                                       ..
friend of the bridegroom , which standeth and heareth him  7295         1
friend lazarus sleepeth ; but i go , that i                7294         1
free jesus answered them , verily , verily , i             7293         1
free indeed . i know that ye are abraham's seed            7292         1
zebedee , and two other of his disciples . simon           21845        1
Name: count, Length: 21846, dtype: int64

# Get transition probabilities (kernel)

In [43]:
P = np.zeros((n_vocab,n_vocab))

for i in range(n_grams-1):
    jj = grams_enc[i]
    kk = grams_enc[i+1]
    P[jj,kk] += 1
# end

# Normalize kernel rows so they sum to 1

In [44]:
for i in range(n_vocab):
    #P[i,:] = P[i,:] ** 2
    tot = P[i,:].sum()
    if tot > 0:
        P[i,:] = P[i,:] / tot
    # end
# end

# Generate new text

In [45]:
nStep = 100

start = df.grams[0]
start_enc = LE.transform([start])[0]

chain_enc = [start_enc]
for i in range(nStep):
    curr = chain_enc[i]
    new = np.random.choice( vocab_enc, p=P[curr,:], size=1 )[0]
    chain_enc.append(new)
# end
chain_enc = np.array(chain_enc)
chain = LE.inverse_transform(chain_enc)

text_gen = ''
for c in chain:
    text_gen += c.split(" ")[0] + " "
# end
text_gen += " ".join(chain[-1].split(" ")[1:])
print(text_gen)

in the beginning was the word , and the word was with god , and the word was god . the same was in the beginning with god . all things were made by him ; and without him was not any thing made that was made . in him was life ; and the life was the light of men . and the light shineth in darkness ; and the darkness comprehended it not . there was a man sent from god , whose name was john . the same came for a witness , to bear witness of the light , that all men through him might believe
