<img src="https://drive.google.com/uc?export=view&amp;id=1zSJwAUxWv5bxyYLmYPNi-s6M_Wq5iWXh">

## Importing tools

In [1]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [2]:
story_path = "/kaggle/input/sherlock-holmes-stories/sherlock/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

number of lines =  215021


## Cleaning the text

In [3]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [4]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [5]:
markov_model = make_markov_model(cleaned_stories)

In [6]:
print("number of states = ", len(markov_model.keys()))

number of states =  208716


In [7]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'was in': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'was up': 0.09009009009009009, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'is afoot': 0.036036036036036036, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.02702702702702703, 'your letter': 0.027027027027027

## Generating Sherlock Holmes stories!

In [8]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [9]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes i ejaculated no for my steve you are lost nothing but energy can save me only 
1.  dear holmes it is unthinkable and that these marks were slight there is no use then its up 
2.  dear holmes i ejaculated surely said i the gentleman at this time i was glad enough to agree 
3.  dear holmes i have been as keen as the weeks passed and yet now that i would prove 
4.  dear holmes my previous letters and papers upon this table and chair will you have the drop on 
5.  dear holmes if i was compelled to listen to her fate or had some vague recollection of an 
6.  dear holmes i have seldom seen a more detailed account of that well its the colleen inside of 
7.  dear holmes i ejaculated as a public danger there are unexplored possibilities about you take a broader point 
8.  dear holmes i exclaimed you will give a very full accounts i was impatient with less than your 
9.  dear holmes am i to do with money and used this means of giving way in which mr 
10.  dear holmes i ejaculated

In [10]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear sir cried dr mortimer gazing at holmes and that same drebber and young stangerson as to 
1.  my dear fellow i dont think that you are not of much practical importance if a clerk of 
2.  my dear mrs smith said holmes looking up in surprise when my eye caught something which i should 
3.  my dear man said old turner i have no desire to encourage false hopes but you can imagine 
4.  my dear watson your has been done to cure her of her people but mcginty and by god 
5.  my dear boy is the history of crime sir they both asked it would be to me such 
6.  my dear fellow you see exactly what it was at home a man followed me from marseilles there 
7.  my dear watson said holmes rubbing his hands softly together and chuckled those are as clear as day 
8.  my dear watson you improve all the time of which they were very anxious to consult you upon 
9.  my dear fellow there is no murder anyhow id as soon die with a practice a small page 
10.  my dear fellow i congratulate you inspector 

In [11]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would rather die under my command and to be his own regiment the bombay infantry died upon 
1.  i would rather swing a score of cases full of gout too they say it is not i 
2.  i would give so much to suppose such a coincidence to suppose that any letter of an impulsive 
3.  i would take this myself what of coming danger nothing serious my dear watson i am expecting him 
4.  i would not do so could effect an arrest until night at covent garden indeed i could not 
5.  i would leave her long out of his language as he seemed to represent one i presume that 
6.  i would spend my last copper to shield him she had learned to imitate you and above all 
7.  i would really rather not well possibly so there are grounds round it woods on three sides by 
8.  i would i could not help suspecting that she had heard what passed between these little problems help 
9.  i would go with a few hours ago the light of a litter of pipes syringes penknives and 
10.  i would fain disbelieve stories that i

In [12]:
print(generate_story(markov_model, start="fresh illustration", limit=150))

fresh illustration is an old soldier i perceive that you you of that asked holmes with a mischievous twinkle i suppose that man phelps does not seem to have done her to receive anything through the house oh father father what shall i do not know why i think that i have been you surely do not seriously imagine that they were peculiar boots but his leavetaking was cut short by a razor or a wig i could not shake off the mans hat on which i should call it a pose is an expert singlestick player boxer and swordsman has a grievance he said he i wish to impress clients with a sense of my troubles next day you have made a vast gap between either hypothesis and the neighing of horses loud as it seems to show that mr oldacre kept himself informed of our movements they certainly confirm the strange rumours began to ascend the stairs the bang of a door suddenly flew open out into the hall both of them and you see also that there may be exposed such slips are common to both professions there is a to