In [3]:
import random

# ==========================
# Character-based Markov Chain
# ==========================

def build_char_markov_chain(text):
    """Builds a character-level Markov chain from the input text."""
    markov_chain = {}
    for i in range(len(text) - 1):
        current_char = text[i]
        next_char = text[i + 1]
        if current_char not in markov_chain:
            markov_chain[current_char] = []
        markov_chain[current_char].append(next_char)
    return markov_chain

def generate_char_text(chain, length=200):
    """Generates text using the character-level Markov chain."""
    current_char = random.choice(list(chain.keys()))
    result = [current_char]
    for _ in range(length - 1):
        next_chars = chain.get(current_char)
        if not next_chars:
            break
        next_char = random.choice(next_chars)
        result.append(next_char)
        current_char = next_char
    return ''.join(result)

# ==========================
# Word-based Markov Chain
# ==========================

def build_word_markov_chain(text):
    """Builds a word-level Markov chain from the input text."""
    words = text.split()
    markov_chain = {}
    for i in range(len(words) - 1):
        current_word = words[i]
        next_word = words[i + 1]
        if current_word not in markov_chain:
            markov_chain[current_word] = []
        markov_chain[current_word].append(next_word)
    return markov_chain

def generate_word_text(chain, length=50):
    """Generates text using the word-level Markov chain."""
    current_word = random.choice(list(chain.keys()))
    result = [current_word]
    for _ in range(length - 1):
        next_words = chain.get(current_word)
        if not next_words:
            break
        next_word = random.choice(next_words)
        result.append(next_word)
        current_word = next_word
    return ' '.join(result)

# ==========================
# Sample Text Input
# ==========================

sample_text = """
Markov chains are a fascinating concept in probability theory.
They are used in many applications including text generation, weather prediction, and game theory.
"""

# Character-based output
char_chain = build_char_markov_chain(sample_text)
char_output = generate_char_text(char_chain, length=300)

# Word-based output
word_chain = build_word_markov_chain(sample_text)
word_output = generate_word_text(word_chain, length=50)

# ==========================
# Display Outputs
# ==========================
print("🔡 Character-Based Text Generation:\n")
print(char_output)

print("\n🧠 Word-Based Text Generation:\n")
print(word_output)


🔡 Character-Based Text Generation:

ry.
The g g udinge plud cichedions 
Thate 
Theon, athatilin, iconse ior meonclineongeabaraty.
Theathe aplin, prarexteonched we usense amextincthey.
Main, theatin amatheron ame cen tingas tion, ating citioned teatheons prkon, cincext tin, coban thed fance areary ar atitheplus charery me ge iluseorov 

🧠 Word-Based Text Generation:

in many applications including text generation, weather prediction, and game theory. They are a fascinating concept in many applications including text generation, weather prediction, and game theory. They are used in probability theory. They are used in many applications including text generation, weather prediction, and game theory. They are a


In [4]:
# 📥 Step 1: Upload a .txt file
from google.colab import files

print("📂 Upload a .txt file with training text...")
uploaded = files.upload()
filename = list(uploaded.keys())[0]

with open(filename, 'r', encoding='utf-8') as f:
    input_text = f.read()

# 🧠 Step 2: Build Character N-Gram Markov Chain
import random

def build_char_ngram_chain(text, n=2):
    """Builds an n-gram character-level Markov chain."""
    chain = {}
    for i in range(len(text) - n):
        key = text[i:i+n]
        next_char = text[i+n]
        if key not in chain:
            chain[key] = []
        chain[key].append(next_char)
    return chain

def generate_char_ngram_text(chain, n=2, length=500):
    """Generates text using the n-gram character-level Markov chain."""
    current = random.choice(list(chain.keys()))
    result = current
    for _ in range(length - n):
        next_chars = chain.get(current)
        if not next_chars:
            break
        next_char = random.choice(next_chars)
        result += next_char
        current = result[-n:]
    return result

# ⚙️ Step 3: Generate Text
n = 2  # You can change to 3 or higher for better results
length = 500  # Length of output text

char_chain = build_char_ngram_chain(input_text, n=n)
generated_text = generate_char_ngram_text(char_chain, n=n, length=length)

print("\n✅ 🔡 Generated Text (n-gram = {}):\n".format(n))
print(generated_text)

# 💾 Step 4: Save and Download Result
output_filename = "generated_output.txt"
with open(output_filename, "w", encoding='utf-8') as f:
    f.write(generated_text)

files.download(output_filename)


📂 Upload a .txt file with training text...


Saving training_data.txt to training_data (1).txt

✅ 🔡 Generated Text (n-gram = 2):

zard. The fame, a gretim — untireame alivend th the mand that bet jus, Aribbled said, "The a bet villagen gaven name, not med tiong, wrietil th ideamour storin’s even glowitset storin but vis stled Aring, boy evenin dre noticed the ne villagen of lies startebook.

The pre didn't justo clage ne some, "The sook.

The belp your jus, win. The flyings.


The a quiest aliven old Arithatch in gred to becamet


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>