In [12]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import re
import random

In [13]:
def download_book(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    text = soup.get_text(strip=True)
    return text

In [14]:
def preprocess_text(text):
    text = re.sub('[^A-Za-z0-9 .,;?!]+', '', text)
    text = re.sub('\s+', ' ', text).strip()
    return text

In [15]:
def train_markov_chain(text, order=1):
    words = text.split()
    model = {}

    for i in range(len(words) - order):
        key = tuple(words[i:i + order])
        next_word = words[i + order]

        if key not in model:
            model[key] = []

        model[key].append(next_word)

    return model

In [16]:
def generate_text(model, seed=None, length=50):
    if seed is None:
        seed = random.choice(list(model.keys()))

    current_key = seed
    output = list(seed)

    for _ in range(length):
        next_word = random.choice(model[current_key])
        output.append(next_word)
        current_key = tuple(output[-len(seed):])

    return ' '.join(output)

In [17]:
if __name__ == '__main__':
    # Download a public domain book from Project Gutenberg
    book_url = 'https://www.gutenberg.org/cache/epub/11/pg11.txt' # Alice's Adventures in Wonderland by Lewis Carroll
    raw_text = download_book(book_url)
    text = preprocess_text(raw_text)

    # Train the Markov Chains model
    order = 2
    markov_model = train_markov_chain(text, order)

    # Generate new text using the trained model
    seed = ('Alice', 'was')
    generated_text = generate_text(markov_model, seed, length=100)
    print(generated_text)

Alice was so small as thisbefore, never! And I declare its too bad, that it was addressed to thebaby, and not to her, still it had finished this short speech, they allcheered.Alice thought the poor little feet, Iwonder who will put on her lap as if he thought it over yes, thats about the Mission of Project Gutenbergtm work. The Foundation makes norepresentations concerning the copyright holder, your use and distributionmust comply with paragraph 1.F.3, the ProjectGutenberg Literary Archive Foundation is a registered trademark. It may only beused on or associated in any way with the lobsters, out to sea!But the snail
