In [31]:
import random
from collections import defaultdict
import re

class MarkovTextGenerator:
    def __init__(self, order=3):
        """
        Initialize Markov chain generator.
        order: number of characters to consider for each state (n-gram size)
        """
        self.order = order
        self.model = defaultdict(list)
    
    def train(self, text):
        """
        Train the model on input text by building transition probabilities.
        """
        # Clean text: convert to lowercase and remove special characters
        text = re.sub(r'[^a-zA-Z\s.,!?]', '', text.lower())
        
        # Build the Markov chain
        for i in range(len(text) - self.order):
            state = text[i:i + self.order]
            next_char = text[i + self.order]
            self.model[state].append(next_char)
    
    def generate(self, length=100, seed=None):
        """
        Generate text of specified length.
        seed: starting sequence (must be of length order)
        """
        if not self.model:
            return "Model not trained. Please train with text first."
        
        # Choose random seed if none provided
        if seed is None:
            seed = random.choice(list(self.model.keys()))
        
        # Ensure seed is valid
        if len(seed) != self.order or seed not in self.model:
            seed = random.choice(list(self.model.keys()))
        
        result = seed
        
        for _ in range(length - self.order):
            current_state = result[-self.order:]
            if current_state not in self.model:
                # If state not found, pick a random next character
                next_char = random.choice('abcdefghijklmnopqrstuvwxyz ')
            else:
                next_char = random.choice(self.model[current_state])
            result += next_char
        
        return result

# Example usage
if __name__ == "__main__":
    # Sample text for training
    sample_text = """
    The quick brown fox jumps over the lazy dog. 
    The dog sleeps while the fox runs swiftly.
    """
    
    # Create and train the generator
    generator = MarkovTextGenerator(order=3)
    generator.train(sample_text)
    
    # Generate text
    generated_text = generator.generate(length=100)
    print("Generated text:")
    print(generated_text)

Generated text:
rown fox jumps over the dog. 
   the quick brown fox runs swiftly.
   the lazy dog sleeps while the 


In [32]:
import random

def build_markov_chain(text, n=1):
    """
    Build a Markov chain model from the input text.

    Parameters:
    - text: str - input training text
    - n: int - order of the Markov chain (default 1)

    Returns:
    - dict: a dictionary representing the Markov chain
    """
    words = text.split()
    markov_chain = {}

    for i in range(len(words) - n):
        key = tuple(words[i:i + n])
        next_word = words[i + n]
        
        if key not in markov_chain:
            markov_chain[key] = []
        markov_chain[key].append(next_word)

    return markov_chain

def generate_text(chain, n=1, max_words=50):
    """
    Generate text using the Markov chain.

    Parameters:
    - chain: dict - the Markov chain
    - n: int - order of the Markov chain
    - max_words: int - maximum number of words to generate

    Returns:
    - str: generated text
    """
    # Randomly pick a starting key
    start_key = random.choice(list(chain.keys()))
    generated_words = list(start_key)

    for _ in range(max_words - n):
        current_key = tuple(generated_words[-n:])
        next_words = chain.get(current_key)

        if not next_words:
            break  # Stop if there are no next words
        next_word = random.choice(next_words)
        generated_words.append(next_word)

    return ' '.join(generated_words)

# Example usage
sample_text = """
Once upon a time there was a brave princess who lived in a castle. 
She had a dragon as her best friend and they went on many adventures together.
"""

# Build the Markov chain with order 1
chain = build_markov_chain(sample_text, n=1)

# Generate text
generated = generate_text(chain, n=1, max_words=30)
print("Generated Text:\n", generated)


Generated Text:
 in a time there was a castle. She had a time there was a brave princess who lived in a dragon as her best friend and they went on many


In [43]:
import random
import re
from collections import defaultdict
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

class CreativeMarkovGenerator:
    def __init__(self, max_order=3):
        self.max_order = max_order
        self.models = {}  # Multiple models for different orders
        self.vocab = set()
        self.vectorizer = None
        self.word_vectors = None

    def train(self, text):
        # Preprocess text
        sentences = re.split(r'[.!?]\s*', text)
        sentences = [re.sub(r'[^\w\s]', '', s.lower()) for s in sentences if s.strip()]

        # Train models for different orders and build vocabulary
        for order in range(1, self.max_order + 1):
            self.models[order] = defaultdict(dict)
            for sentence in sentences:
                words = sentence.split()
                if len(words) < order:
                    continue
                padded = ['[START]'] * order + words + ['[END]']
                for i in range(len(padded) - order):
                    state = tuple(padded[i:i+order])
                    next_word = padded[i+order]
                    self.models[order][state][next_word] = self.models[order][state].get(next_word, 0) + 1
                    self.vocab.add(next_word)

        # Build semantic model after vocabulary is filled
        self._build_semantic_model(sentences)

    def _build_semantic_model(self, sentences):
        # Semantic similarity model based on word co-occurrence
        self.vectorizer = CountVectorizer()
        self.vectorizer.fit(list(self.vocab))
        self.word_vectors = self.vectorizer.transform(list(self.vocab))

    def _similar_words(self, word, n=3):
        """Find semantically similar words"""
        if word not in self.vectorizer.vocabulary_:
            return [word]

        vec = self.vectorizer.transform([word])
        if vec.shape[0] == 0 or vec.shape[1] == 0:
            return [word]

        sims = cosine_similarity(vec, self.word_vectors)
        top_indices = np.argsort(sims[0])[-n-1:-1][::-1]
        return [list(self.vocab)[i] for i in top_indices if i < len(self.vocab)]

    def generate(self, temperature=0.8, max_length=15):
        if not self.models:
            return "No model trained"

        current_order = random.randint(1, self.max_order)
        state = random.choice([
            s for s in self.models[current_order].keys()
            if s[0] == '[START]'
        ])

        output = list(state)

        for _ in range(max_length):
            if random.random() > 0.7:
                current_order = random.randint(1, self.max_order)
                state = tuple(output[-current_order:]) if len(output) >= current_order else state[:current_order]

            if state not in self.models[current_order]:
                current_order = max(1, current_order - 1)
                state = tuple(output[-current_order:]) if len(output) >= current_order else state[:current_order]
                if state not in self.models[current_order]:
                    break

            candidates = list(self.models[current_order][state].keys())
            counts = list(self.models[current_order][state].values())

            if temperature > 0.5 and random.random() > 0.6 and candidates:
                candidates.extend(self._similar_words(random.choice(candidates)))
                counts.extend([1] * 3)

            weights = [count**(1/temperature) for count in counts]
            next_word = random.choices(candidates, weights=weights, k=1)[0]

            if next_word == '[END]':
                break

            output.append(next_word)
            state = tuple(output[-current_order:])

            # Occasionally combine with another phrase
            if random.random() > 0.8 and len(output) < max_length / 2:
                continuation = self.generate(
                    temperature=min(1.5, temperature + 0.3),
                    max_length=max_length // 2
                )
                output.extend(continuation.lower().split())
                break

        result = ' '.join([w for w in output if w != '[START]']).capitalize()
        if not any(result.endswith(p) for p in '.!?'):
            result += random.choice(['.', '!', '...'])

        return result

# Sample training text
training_text = """
Quick foxes leap over fences while brown dogs chase their tails. 
Lazy cats nap in warm sunlight as eager puppies jump playfully. 
Ancient trees whisper secrets through rustling leaves. 
Hungry wolves hunt under moonlit skies. 
Busy bees gather golden pollen from fragrant flowers. 
Tiny ants carry massive leaves across the forest floor. 
Clever ravens solve complex puzzles with surprising intelligence. 
Swift deer escape lurking predators through dense thickets. 
Massive whales sing haunting melodies in ocean depths. 
Colorful parrots mimic human speech with uncanny accuracy. 
Curious raccoons open secured containers using nimble paws. 
Playful dolphins surf rolling waves near sunny shores. 
Majestic eagles soar above mountain peaks with effortless grace.
"""

# Generate samples
generator = CreativeMarkovGenerator(max_order=3)
generator.train(training_text)

print("Creative text generations:")
for i in range(10):
    temp = 0.7 + random.random() * 0.6  # Vary temperature
    print(f"{i+1}. {generator.generate(temperature=temp)}")


Creative text generations:
1. !
2. Colorful parrots mimic tiny ants carry mountain peaks mountain swift.
3. Raccoons open carry.
4. Clever quick foxes carry massive whales sing haunting!
5. Lazy cats swift.
6. Clever ravens solve complex swift swift massive leaves carry!
7. Ancient trees whisper secrets through rustling leaves!
8. Tiny ants brown dogs colorful parrots mimic human speech with uncanny accuracy.
9. Tiny ...
10. Massive whales curious raccoons mountain peaks with uncanny accuracy!
