In [1]:
import random
import re

class MarkovGenerator:
    def __init__(self, order=2):
        self.order = order
        self.graph = {}

    def train(self, text):
        # Clean and tokenize text
        words = re.findall(r"[\w']+|[.,!?;]", text.lower())

        for i in range(len(words) - self.order):
            # Create a tuple of the current 'state' (prefix)
            state = tuple(words[i : i + self.order])
            next_word = words[i + self.order]

            if state not in self.graph:
                self.graph[state] = []
            self.graph[state].append(next_word)

    def generate(self, length=50, seed=None):
        if not self.graph:
            return "Model not trained yet!"

        # Start with a random state or a specific seed
        state = seed if seed in self.graph else random.choice(list(self.graph.keys()))
        output = list(state)

        for _ in range(length - self.order):
            if state in self.graph:
                next_word = random.choice(self.graph[state])
                output.append(next_word)
                # Slide the window forward
                state = tuple(output[-self.order:])
            else:
                break # Stop if we hit a dead end

        return " ".join(output)

# --- Quick Usage ---
if __name__ == "__main__":
    sample_text = "I like green eggs and ham. I do not like them, Sam-I-am. I do not like green eggs and ham."
    m = MarkovGenerator(order=1)
    m.train(sample_text)
    print(m.generate(10))

not like them , sam i am . i am
