# Sherlock Holmes Adventure Generation

In [2]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [3]:
story_path = "/Users/almag/Documents/AI/sherlock/sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path + file) as f:
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))


number of lines =  215021


## Cleaning the text

In [4]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332110


## Creating Markov Model


In [5]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [6]:
markov_model = make_markov_model(cleaned_stories)

In [7]:
print("number of states = ", len(markov_model.keys()))

number of states =  208670


In [8]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'your letter': 0.02702702702702703, 'was up': 0.09009009009009009, 'is afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'was in': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'would have': 0.036036036036036036, 'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'in their': 0.036036036036036036, 'was whist': 0.036036036036036036, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'was afoot': 0.036036036036036036, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.027027027027027

In [9]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [10]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))
    

0.  dear holmes that i do not know that you felt we then ascended to the room their first 
1.  dear holmes he has had no friends who would call me a check for every hour may be 
2.  dear holmes i fear that it was indeed a weird figure as swift and active as a squirrel 
3.  dear holmes am i then you can be no doubt but still glared at the glittering handcuffs which 
4.  dear holmes you are a friend for whom we must have one more specimen of the corridor and 
5.  dear holmes said i as i have this money or valuables in the room after eleven oclock i 
6.  dear holmes i have never seen a man with a at the door and the spokesman of the 
7.  dear holmes oh yes here you are mrs oakshott brixton road read holmes quite simple my dear watson 
8.  dear holmes if i thought you londoners were never at fault you dont seem to give you any 
9.  dear holmes what do you think then sir that i had his answer hilton cubitt sent me the 
10.  dear holmes he has a grudge against me id be glad to enjoy the amen

In [11]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear watson said holmes with a yawn what happened i would lay a whip across your shoulders 
1.  my dear watson there are no lengths to which this document disappeared that can befall a woman we 
2.  my dear mycroft the chain is almost complete you have not you but if we can not let 
3.  my dear watson theres genius in that for years i have had one or two which i was 
4.  my dear watson that something happened a week when i glanced out of my limbs as a relic 
5.  my dear fellow for heavens sake mr holmes said he that circle is drawn at a glance what 
6.  my dear sir dont look so impatient for i assure you no more to be done but there 
7.  my dear fellow there is no easy matter to know it was like a great metallic heart her 
8.  my dear doctor this is a further point however which i merely called to say that my practice 
9.  my dear watson the man himself at his watch i expect developments watson when you know because there 
10.  my dear holmes i cried this is impossible before i 

In [12]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would have it too in short watson it is k k k said i he thought for 
1.  i would go i am the pupil and you are in your own mind a dying and delirious 
2.  i would call your attention very particularly to two points which worried young edmunds of the pair the 
3.  i would rather die under my roof and it is not for me to be something important which 
4.  i would risk a little sporting flutter that you dont do much soldiering however i had done and 
5.  i would not have his cursed face thats my price im with you or im mistaken leave it 
6.  i would not have telegraphed to the london press has not had a peaceful hour since whoever took 
7.  i would bring him the papers presuming that it is settled then the longer i stay here alone 
8.  i would say nothing of the second stain the adventure of charles the first to spring to his 
9.  i would do nothing with him on eclipses how the matter stands i realize that your refusal is 
10.  i would take this photograph of a woman beware beware ho

In [13]:
print(generate_story(markov_model, start="the case", limit=100))

the case to this trip watson by no means advance but three or four days you have urgent demands what would you have the plans themselves back in baker street a complete specimen of an absolutely blank mind which is it i cried rubbing my hands before his impending wedding to miss cushing of cross street where miss cushing resided it was all up the boxer not a livin soul sir nor was it well it might be some reason the treaty had reached a shrubbery which lies at the band of white shells were strung round the curve of the upper floor of the house agents you know and they said that mr douglas had only his duty towards intrusive strangers who are you though you remember hilton cubitt please continue your most interesting narrative just a little lower down was a long dismal walk the yew alley though not marked under that roof is under my ulster after all i wanted i slowed down his then yes it is all that is right now now read it to the post nor will they to the best it was the sheet upon whi