In [5]:
import os
import re
import string
import random

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

## Reading every Sherlock Holmes book!

In [4]:
def read_all_stories(path: str):
    txt = []
    for _, _, files in os.walk(path):
        for file in files:
            with open(path+file) as f:
                for line in f:
                    line = line.strip()
                    if line == '----------': 
                        break
                    if line != '': 
                        txt.append(line)
    return txt

story_path = "./sherlock/"
stories = read_all_stories(story_path)
print("Number of lines: ", len(stories))

Number of lines =  215021


## Cleaning text

In [7]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\']", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt += words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("Number of words: ", len(cleaned_stories))

Number of words:  2332110


## Creating the Markov Model

In [37]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories) - 2*n_gram+1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        elif next_state in markov_model[curr_state]:
            markov_model[curr_state][next_state] += 1
        else:
            markov_model[curr_state][next_state] = 1
            
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
            
    return markov_model

In [38]:
markov_model = make_markov_model(cleaned_stories)

In [39]:
print("Number of state: ", len(markov_model.keys()))

Number of state:  208670


In [40]:
print("All possible transitions from 'the game' state:\n")
print(markov_model["the game"])

All possible transitions from 'the game' state:

{'your letter': 0.02702702702702703, 'was up': 0.09009009009009009, 'is afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'was in': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.0270270270270270

## Generating Sherlock Holmes stories!

In [41]:
def generate_story(markov_model: dict, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = curr_state + " "
    
    while n < limit:
        txt_population = list(markov_model[curr_state].keys())
        txt_probability = list(markov_model[curr_state].values())
        next_state = random.choices(txt_population,
                                    txt_probability)
        curr_state = next_state[0]
        story += curr_state + " "
        n += 1
    return story

In [47]:
for i in range(20):
    print(str(i) + ". ", generate_story(markov_model, start='dear holmes', limit=8))

0.  dear holmes that i who have occasionally hung about the eyes and a straight mouth set in a 
1.  dear holmes and tell me all the way into his business i am not mistaken i hear him 
2.  dear holmes and tell me all that he might use it in my leisure have hired a second 
3.  dear holmes i exclaimed devoutly but you were left in the grip of his brush i can tell 
4.  dear holmes i exclaimed turning to my companion i am glad of your advice you have told us 
5.  dear holmes i fear there is his footstep well mr holmes when he had gathered round him in 
6.  dear holmes it is ask for a photograph of my neighbours three days ago did you not that 
7.  dear holmes i fear that even had it all in bed he told me your name to the 
8.  dear holmes i have so much during some recent researches which have been returned until the end of 
9.  dear holmes and tell us what do you think of baxters words and say offhand how each factor 
10.  dear holmes said i ruefully pointing to a glass case in a gesture of

In [44]:
for i in range(20):
    print(str(i) + ". ", generate_story(markov_model, start='my dear', limit=8))

0.  my dear watson and if my friend here you will need your deductions mr holmes to tell them 
1.  my dear mr mac when one of the nights adventures we had taken tea at the end of 
2.  my dear watson professor moriarty is not a vision and a touch a distinct smell of powder the 
3.  my dear fellow i cried approaching him stand back stand right back said he i expect that we 
4.  my dear doctor this very hour and place but otherwise for half an hour barker and mrs douglas 
5.  my dear watson but this is incredible mr holmes why this article i took it i do not 
6.  my dear holmes you would hardly forget it or remember only as i understand it has turned out 
7.  my dear watson which he handed it back the young lady resumed her seat with an old of 
8.  my dear holmes my friend had produced results which had become even sadder and more grave i can 
9.  my dear watson with the i think that if one of your investigations the conviction that every one 
10.  my dear watson do you remember that our 

In [45]:
for i in range(20):
    print(str(i) + ". ", generate_story(markov_model, start='i would', limit=8))

0.  i would offer you a beggarly sum which i offer you a grim little bit of work as 
1.  i would make you a free agent i understand that you give me carte blanche to act for 
2.  i would see you mr mason you can not serve him better manners if i caught a glimpse 
3.  i would come to close grips at last with his usual listless expression while small sat stolidly opposite 
4.  i would at once to the division of the treasure then she would meet the facts as they 
5.  i would be glad if you were asked to prove the connection between godfrey staunton and was only 
6.  i would take a pinch of snuff pray continue your most interesting said sherlock holmes rather sternly owe 
7.  i would go some day i perceive my dear holmes that i was pledged to him in all 
8.  i would appear next monday i saw the two men had come to him and that sir james 
9.  i would go said holmes as he walked for a few days at his desk pulled over the 
10.  i would have endured imprisonment ay even execution rather than m

In [46]:
print(generate_story(markov_model, start='the case'))

the case stripped of all surmise but at last he forced himself through it as you perceive to hunt down the traitor is dead then she would meet me no sir i really could not i it was not difficult for me to see holmes and the seaman saying that he had curled down with his infernal paint and to spite him and to fasten a charge of the murder of john openshaw were never to speak of this said he in his blandest manner to us especially when he came he pushed his spectacles up on his overcoat and bustled about in a few words of apology i hope to goodness the house we could but lay our hands i should never be happy until you had left me and entered his appearance you see it watson said holmes shutting his eyes pray go on biding with us a visit to bohemia or either of your feelings i am willing to act for all so that none who had secrets in this room for a moment in rushed the husband with his intended crime and could know time was spent at hampstead and that it was a small tin which stood 
