In [1]:
!pip install pandas
!pip install markovify
!pip install spacy

Collecting markovify
  Downloading markovify-0.9.4-py3-none-any.whl.metadata (23 kB)
Collecting unidecode (from markovify)
  Downloading Unidecode-1.4.0-py3-none-any.whl.metadata (13 kB)
Downloading markovify-0.9.4-py3-none-any.whl (19 kB)
Downloading Unidecode-1.4.0-py3-none-any.whl (235 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.8/235.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: unidecode, markovify
Successfully installed markovify-0.9.4 unidecode-1.4.0


In [2]:
import pandas as pd
import markovify
import spacy

In [3]:
df = pd.read_csv("hamlets.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,language,text
0,1,English,The Tragedie of Hamlet\n ...
1,2,German,"Hamlet, Prinz von Dännemark.\n ..."
2,3,Portuguese,HAMLET\n DRAMA EM ...


In [5]:
print(df.columns)

Index(['Unnamed: 0', 'language', 'text'], dtype='object')


In [6]:
print(df["language"].value_counts())

language
English       1
German        1
Portuguese    1
Name: count, dtype: int64


In [7]:
english_text = df[df["language"] == "English"]["text"].iloc[0]

In [8]:
print(english_text[:500])

The Tragedie of Hamlet
                        Actus Primus Scoena Prima
       Enter Barnardo and Francisco two Centinels
                              Barnardo Whos there
                 Fran Nay answer me Stand  vnfold
                                       your selfe
                           Bar Long liue the King
                                    Fran Barnardo
                                           Bar He
     Fran You come most carefully vpon your houre
   Bar Tis now strook twelu


In [9]:
word_count = len(english_text.split())
print("Total words:", word_count)

Total words: 29049


In [10]:
import re

def basic_clean(text):
    text = re.sub(r'\s+', ' ', text)   # replace multiple spaces/newlines with single space
    return text.strip()

clean_text = basic_clean(english_text)
print(clean_text[:500])

The Tragedie of Hamlet Actus Primus Scoena Prima Enter Barnardo and Francisco two Centinels Barnardo Whos there Fran Nay answer me Stand vnfold your selfe Bar Long liue the King Fran Barnardo Bar He Fran You come most carefully vpon your houre Bar Tis now strook twelue get thee to bed F... Fran For this releefe much thankes Tis bitt... And I am sicke at heart Barn Haue you had quiet Guard Fran Not a Mouse stirring Barn Well goodnight If you do meet Horatio and Marcellus the Riuals of my Watch bi


In [11]:
def remove_headings(text):
    text = re.sub(r'Actus\s+\w+|ACT\s+\w+', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Scoena\s+\w+|SCENE\s+\w+', '', text, flags=re.IGNORECASE)
    return text

clean_text = remove_headings(clean_text)
print(clean_text[:500])

The Tragedie of Hamlet   Enter Barnardo and Francisco two Centinels Barnardo Whos there Fran Nay answer me Stand vnfold your selfe Bar Long liue the King Fran Barnardo Bar He Fran You come most carefully vpon your houre Bar Tis now strook twelue get thee to bed F... Fran For this releefe much thankes Tis bitt... And I am sicke at heart Barn Haue you had quiet Guard Fran Not a Mouse stirring Barn Well goodnight If you do meet Horatio and Marcellus the Riuals of my Watch bid them make... Enter Hor


In [12]:
def remove_stage_directions(text):
    text = re.sub(r'Enter\s+[A-Za-z\s]+', '', text)
    text = re.sub(r'Exit\s+[A-Za-z\s]+', '', text)
    return text

clean_text = remove_stage_directions(clean_text)

In [13]:
def remove_speaker_labels(text):
    text = re.sub(r'\b[A-Z][a-z]+\:', '', text)
    return text

clean_text = remove_speaker_labels(clean_text)

In [14]:
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
print(clean_text[:500])

The Tragedie of Hamlet ... Fran For this releefe much thankes Tis bitt... And I am sicke at heart Barn Haue you had quiet Guard Fran Not a Mouse stirring Barn Well goodnight If you do meet Horatio and Marcellus the Riuals of my Watch bid them make... ... Fra Barnardo has my place giue you goodnight ... Bar I haue seene nothing Mar Horatio saies tis but our Fantasie And will not let beleefe take hold of him Touching this dreaded sight twice seene of vs Therefore I haue intreated him along With vs


In [15]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m67.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [16]:
nlp = spacy.load("en_core_web_sm")
doc = nlp(clean_text)

In [17]:
sentences = [sent.text.strip() for sent in doc.sents if len(sent.text.strip()) > 1]

In [18]:
for i in range(5):
    print(sentences[i])

The Tragedie of Hamlet ...
Fran For this releefe much thankes Tis bitt...
And I am sicke at heart Barn
Haue
you had quiet Guard Fran Not a Mouse stirring Barn


In [19]:
markov_text = " ".join(sentences)

In [20]:
print(markov_text[:500])

The Tragedie of Hamlet ... Fran For this releefe much thankes Tis bitt... And I am sicke at heart Barn Haue you had quiet Guard Fran Not a Mouse stirring Barn Well goodnight If you do meet Horatio and Marcellus the Riuals of my Watch bid them make... ... Fra Barnardo has my place giue you goodnight ... Bar I haue seene nothing Mar Horatio saies tis but our Fantasie And will not let beleefe take hold of him Touching this dreaded sight twice seene of vs Therefore I haue intreated him along With vs


In [21]:
print("Total sentences:", len(sentences))

Total sentences: 1327


In [22]:
print("Total words:", len(markov_text.split()))

Total words: 24546


In [23]:
text_model = markovify.Text(markov_text, state_size=2)

In [24]:
for i in range(5):
    print(text_model.make_sentence())

Rosin Good my Lord Assure you my Lord Ham Betweene who Pol I heare him coming ...
Ore which his Melancholly sits on brood And I am sorrie that with Deuotions visage And pious Action we do determine oft we breake Purpose is but scratcht withall Ile touch my point With this Slaues Offall bloudy a Bawdy villaine Remorselesse Treacherous Letcherous kindles vi...
Thoughts Laer A Norman Laer Vpon my secure hower thy Vncle stole With iuyce of cursed Hebenon in a Celestiall bed prey o...
Be soft as sinewes of the Faction that is the reason that you haue it not thinkst thee stand me now vpon He that playes the King shall be so as so tis put on those shall praise your excellence And set a double varnish on the way of friendship What make you readie ...
In noise so rude against me Ham Giue me that and vnyoake Other Marry now I must tell vs this Ham A dreame it selfe in fearing to be disioynt and out of this pursie times Vertue it selfe It waues me forth againe Ile follow thee Mar You shall not la

In [25]:
for i in range(5):
    print(text_model.make_short_sentence(max_chars=100))

Houses that he meanes No more ...
Polonius Laertes and his Sandal sh...
Ham Whose was it said Tis meete that some more audience then a foule and pestilent congregation...
Theeues of Mercy but they witherd all when my Fath...
King We doubt it is no shuffling there the Action with this speciall obse...


In [26]:
model_1 = markovify.Text(markov_text, state_size=1)
print(model_1.make_sentence())

Coach Goodnight Ladies Goodnight Ladies Goodnight goodnight And with the gate of fine together heere vpon the mutines in excrements Start vp King that thou digge without an enuious sliuer broke When I will Osr Shall I leaue to mee the sound it Kin Tis a kinde Thus it now Your wisedome best violence of meeting Betweene the matter in satisfaction But that he... freely gone he is very cunning of meeting Betweene the King So Lust The naturall Magicke and begge leaue to tarre them...


In [27]:
model_3 = markovify.Text(markov_text, state_size=3)
print(model_3.make_sentence())

None


In [28]:
def generate_sentence(model):
    sentence = None
    while sentence is None:
        sentence = model.make_sentence()
    return sentence

for i in range(5):
    print(generate_sentence(text_model))

Abridgements come ... beard me in the Gods Pol Looke where he has laid a great mans Memorie may outliue his life halfe ...
Much heate and him hee drownes not himselfe dos wrong Laertes Then Hamlet does it well it does well to commend it himselfe there are many Co...
That would not let the world the Parragon of Animals and yet within a month Let me question more in particular wh... you my Sonne you haue s...
... they aske you what it meanes say you by the Soueraigne power you haue it thus had he beene put on by a most pittifull Ambition in the Porches of mine eye By Heauen thy madnesse shall be so We pray you go to bed Pinch Wanton on your ...
Burne out the Law but tis not strange for mine owne Ambition and my hopes Throwne out his Angle for my Soule consent ...


In [29]:
def generate_clean_sentence(model, max_chars=140, tries=20):
    for _ in range(tries):
        sentence = model.make_short_sentence(
            max_chars=max_chars,
            tries=tries
        )
        if sentence:
            return sentence
    return "No valid sentence generated."

In [30]:
for _ in range(5):
    print(generate_clean_sentence(text_model))

King Pretty Ophelia Ophe Indeed my Lord Ham Why looke you mock him not My good Friends thus wide Ile ope my...
All Longer longer Hor Not when I did verie well note him ...
Be you and Columbines ...
Kibe How long will a man might play But I do prophesie th election lights On Fortinbras he has my place giue you goodnight ...
Ham O my deere Lord Ham Marry this is wondrous s...


In [31]:
class POSifiedText(markovify.Text):
    def word_split(self, sentence):
        return [
            "::".join((token.text, token.pos_))
            for token in nlp(sentence)
        ]

    def word_join(self, words):
        return " ".join(word.split("::")[0] for word in words)

In [32]:
pos_model = POSifiedText(markov_text, state_size=2)

In [33]:
for _ in range(5):
    print(generate_clean_sentence(pos_model))

King Welcome deere Rosincrance and gentle Rosincr ...
... if you finde him forward to be spilt ...
Ham No faith not a iot more my Lord put your discourse into ...
To th Ambassadors My Newes shall be welcom ...
And that shall lend a kinde of easinesse To the vnsatisfied Hor Neuer beleeue it I haue a daughter haue whilst she is and therefore make ...


In [34]:
def preprocess_text(raw_text):
    text = re.sub(r'\s+', ' ', raw_text)
    text = re.sub(r'Actus\s+\w+|Scoena\s+\w+', '', text, flags=re.IGNORECASE)
    text = re.sub(r'Enter\s+[A-Za-z\s]+', '', text)
    return text.strip()

In [35]:
def sentence_tokenize(text, nlp):
    doc = nlp(text)
    return " ".join(sent.text for sent in doc.sents if len(sent.text) > 1)

In [36]:
def build_markov_model(text, state_size=2, pos=False):
    if pos:
        return POSifiedText(text, state_size=state_size)
    return markovify.Text(text, state_size=state_size)

In [37]:
cleaned = preprocess_text(english_text)
markov_ready = sentence_tokenize(cleaned, nlp)

model = build_markov_model(markov_ready, state_size=2, pos=True)

for _ in range(5):
    print(generate_clean_sentence(model))

Tempest and as you did command I did verie well note him ...
March Sound a Flourish Ham They are not Ham How strangely Clo Faith eene with loosing his wits hath Traitorous gu ...
Will so bestow our selues Therefore our sometimes Sister now our Queene Th imperiall Ioyntresse of this world Now could I drink hot ...
For tis a question left vs yet to me Polon Affection puh You speake like a green ...
Ham I thinke you did command I did verie well note him ...
