In [1]:
!pip install nltk
!pip install spacy
!pip install markovify
!python -m spacy download en_core_web_sm

Collecting markovify
  Downloading markovify-0.9.4.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting unidecode (from markovify)
  Downloading Unidecode-1.3.8-py3-none-any.whl.metadata (13 kB)
Downloading Unidecode-1.3.8-py3-none-any.whl (235 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: markovify
  Building wheel for markovify (setup.py) ... [?25l[?25hdone
  Created wheel for markovify: filename=markovify-0.9.4-py3-none-any.whl size=18607 sha256=3bdbc6b4acb1016337fb3af306acd21306ded3e26183a36d25c08d7940e6994b
  Stored in directory: /root/.cache/pip/wheels/ca/8c/c5/41413e24c484f883a100c63ca7b3b0362b7c6f6eb6d7c9cc7f
Successfully built markovify
Installing collected packages: unidecode, markovify
Successfully installed markovify-0.9.4 unidecode-1.3.8
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-model

In [2]:
import spacy
import re
import markovify
import nltk
from nltk.corpus import gutenberg

In [3]:
nltk.download('gutenberg')

[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.


True

In [4]:
print(gutenberg.fileids())

['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt', 'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt', 'chesterton-brown.txt', 'chesterton-thursday.txt', 'edgeworth-parents.txt', 'melville-moby_dick.txt', 'milton-paradise.txt', 'shakespeare-caesar.txt', 'shakespeare-hamlet.txt', 'shakespeare-macbeth.txt', 'whitman-leaves.txt']


In [5]:
# Import novels as text objects
hamlet = gutenberg.raw('shakespeare-hamlet.txt')
macbeth = gutenberg.raw('shakespeare-macbeth.txt')
caesar = gutenberg.raw('shakespeare-caesar.txt')

In [6]:
# Print the first 100 characters of each
print('\nRaw Hamlet:\n', hamlet[:100])
print('\nRaw Macbeth:\n', macbeth[:100])
print('\nRaw Caesar:\n', caesar[:100])


Raw Hamlet:
 [The Tragedie of Hamlet by William Shakespeare 1599]


Actus Primus. Scoena Prima.

Enter Barnardo a

Raw Macbeth:
 [The Tragedie of Macbeth by William Shakespeare 1603]


Actus Primus. Scoena Prima.

Thunder and Lig

Raw Caesar:
 [The Tragedie of Julius Caesar by William Shakespeare 1599]


Actus Primus. Scoena Prima.

Enter Fla


In [7]:
# Utility function for text cleaning
def text_cleaner(text):
    text = re.sub(r'--', ' ', text)
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'(\b|\s+\-?|^\-?)(\d+|\d*\.\d+)\b', '', text)
    text = ' '.join(text.split())
    return text

In [8]:
# Clean and remove chapter indicators
hamlet = text_cleaner(re.sub(r'Chapter \d+', '', hamlet))
macbeth = text_cleaner(re.sub(r'Chapter \d+', '', macbeth))
caesar = text_cleaner(re.sub(r'Chapter \d+', '', caesar))

In [9]:
# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

In [10]:
# Parse cleaned novels
hamlet_doc = nlp(hamlet)
macbeth_doc = nlp(macbeth)
caesar_doc = nlp(caesar)

In [11]:
# Join sentences from each parsed document
hamlet_sents = ' '.join([sent.text for sent in hamlet_doc.sents if len(sent.text) > 1])
macbeth_sents = ' '.join([sent.text for sent in macbeth_doc.sents if len(sent.text) > 1])
caesar_sents = ' '.join([sent.text for sent in caesar_doc.sents if len(sent.text) > 1])

In [12]:
# Combine all sentences for Markovify model
shakespeare_sents = hamlet_sents + macbeth_sents + caesar_sents
print("Combined Text Preview:\n", shakespeare_sents[:500])

Combined Text Preview:
 Actus Primus. Scoena Prima. Enter Barnardo and Francisco two Centinels. Barnardo. Who's there? Fran. Nay answer me: Stand & vnfold your selfe Bar. Long liue the King Fran. Barnardo? Bar. He Fran. You come most carefully vpon your houre Bar. 'Tis now strook twelue, get thee to bed Francisco Fran. For this releefe much thankes: 'Tis bitter cold, And I am sicke at heart Barn. Haue you had quiet Guard? Fran. Not a Mouse stirring Barn. Well, goodnight. If you do meet Horatio and Marcellus, the Riuals


In [13]:
# Create Markov chain text generator using Markovify
generator_1 = markovify.Text(shakespeare_sents, state_size=2)

In [14]:
# Function to generate sentences
def generate_sentences(generator, num_sentences=3, max_chars=None):
    sentences = []
    for _ in range(num_sentences):
        if max_chars:
            sentence = generator.make_short_sentence(max_chars)
        else:
            sentence = generator.make_sentence()
        if sentence:
            sentences.append(sentence)
    return sentences

In [15]:
# Generate and print sentences
print("\nRandom Sentences:")
print("\n".join(generate_sentences(generator_1, num_sentences=3)))
print("\nRandom Short Sentences (100 chars or less):")
print("\n".join(generate_sentences(generator_1, num_sentences=3, max_chars=100)))


Random Sentences:
Be that the day confin'd to fast in Fiers, Till the last cry for?
And come your selues, & bring Messala with you all, If you giue good words Witnesse the hole you made me mad.
You know the Rendeuous: If that his Maiesty La. A kinde goodnight to all.

Random Short Sentences (100 chars or less):
That thou hast thy Father much offended Qu.
Looke heere vpon this Businesse.
You could for a Father?


In [16]:
# Using SpaCy's POS tagging to improve the generator
class POSifiedText(markovify.Text):
    def word_split(self, sentence):
        return ['::'.join((word.orth_, word.pos_)) for word in nlp(sentence)]

    def word_join(self, words):
        sentence = ' '.join(word.split('::')[0] for word in words)
        return sentence

In [17]:
# Call the POSifiedText class on the text
generator_2 = POSifiedText(shakespeare_sents, state_size=2)

# Generate and print sentences using POSifiedText generator
print("\nPOSified Random Sentences:")
print("\n".join(generate_sentences(generator_2, num_sentences=5)))
print("\nPOSified Random Short Sentences (100 chars or less):")
print("\n".join(generate_sentences(generator_2, num_sentences=5, max_chars=100)))


POSified Random Sentences:
And what other Oath , Then flye to others that we know not , nor any one , that dare looke on Death it selfe in ease , and bay the Moone , Then on the torture of the truth  Decius , Metellus , and Attendants .
Why in that Caesar ?
Now as you see there  goe carry them , as to sight ?
Let him go Gertrude  Do you confesse so much vpon your patience so predominant , In State vnborne , and tell him to his Doctor  for Romans now Haue Thewes , and ouer - charg'd with double Cracks , So nightly toyles the subiect of our Affaire .
Lo you , and amaz'd my sight .

POSified Random Short Sentences (100 chars or less):
Brutus , and vnder - goe , For it must follow , And I a common Laughter , or a Carpenter ?
He hath my Lord Ham .
Hamlet , thou hast bin all this is Miching Malicho , that swear and lye ?
Thinke it no more on't , it touches vs not  But in the instant Macb .
That 's good  Rebellious dead , youl'd weepe for him haue I offer Of goodly thousands .
