In [4]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

### Reading all the novels by Leo Tosltoy

In [9]:
txt = []
# reading war and peace
with open("wnp.txt","r") as f:
    for line in f:
        line = line.strip()
        if line == '----------': break
        if line != '':txt.append(line)
# reading anna karenina
with open("ak.txt","r") as f:
    for line in f:
        line = line.strip()
        if line == '----------': break
        if line != '':txt.append(line)
# reading ressurection
with open("res.txt","r") as f:
    for line in f:
        line = line.strip()
        if line == '----------': break
        if line != '':txt.append(line)

print("Books Read Sucessfully!")
print("length of txt = ",len(txt));

Books Read Sucessfully!
length of txt =  98493


## Cleaning the text

In [10]:
cleaned_txt = []
for line in txt:
    line = line.lower()
    line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
    tokens = word_tokenize(line)
    words = [word for word in tokens if word.isalpha()]
    cleaned_txt+=words
print("number of words = ", len(cleaned_txt))

number of words =  1088549


## Creating the markov model
Few key observations:
1. Our model will store the transition probabilites from current state to next state.
2. Instead of using single word state, we can produce multiple word state to provide some context to the model.
3. Probability from a state s to a state e can be calculated as `total number of edges from s to e / total outdegree of s`



In [11]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [12]:
markov_model = make_markov_model(cleaned_txt)

In [13]:
print("number of states = ", len(markov_model.keys()))

number of states =  320713


In [22]:
print("All possible transitions from 'world is' state: \n")
print(markov_model['world is'])

All possible transitions from 'world is' state: 

{'not right': 0.1, 'the talk': 0.1, 'so opposed': 0.1, 'now for': 0.1, 'terrible he': 0.1, 'well known': 0.1, 'done by': 0.1, 'base and': 0.1, 'evil he': 0.1, 'difficult i': 0.1}


In [18]:
def generate_story(markov_model, limit=100, start='there is'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [21]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model,  limit=6, start="it was"))

0.  it was a civil service uniform he wore a short time would destroy half 
1.  it was clear that she saw nothing but far down and deep dip making 
2.  it was all up when they wish to help the convicts who had arrived 
3.  it was known that you wished it the count was not angry with me 
4.  it was as if urging each other on her breast and to have arguments 
5.  it was an unsurpassable morning for hunting it was as musical as natasha and 
6.  it was not this spring morning men thought rostov scarcely believing his eyes can 
7.  it was that having made her dreadfully miserable and that the sick prisoners as 
8.  it was impossible to make out what was left of the road for familiar 
9.  it was true that it doesnt matter what about and talk dressmakers came again 
10.  it was necessary to throw this bone a bill for roubles and offered her 
11.  it was a swindle and all the rostov party spent the night was finished 
12.  it was to her husband prince andrews eyes speranski was the son as his 