<a href="https://colab.research.google.com/github/Vageeswari-kanchiuniv/C/blob/main/sanskrit_word_split_rule_based.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**🔹 Python Code Example**

In [1]:
import re

# Basic Sandhi rules for splitting
sandhi_rules = [
    # Rule: 'o' before apostrophe (ऽ) often from "ः + अ"
    (r"ोऽ", "ः + अ"),
    # Rule: 'ो' at end can be "ः"
    (r"ो ", "ः "),
    # Rule: 'ः' + vowel
    (r"ः([अआइईउऊएऐओऔ])", r"ः + \1"),
    # Rule: "ार्" before "ज" could be "ा + अर्ज"
    (r"ार्ज", "ा + अर्ज"),
    # Rule: consonant cluster simplification (simple demo)
    (r"वद", " + वद"),
]

def sandhi_split(word):
    result = word
    for pattern, repl in sandhi_rules:
        result = re.sub(pattern, repl, result)
    return result

# Examples
examples = [
    "रामोऽस्ति",   # रामः + अस्ति
    "गुरुपदेशात्", # गुरु: + उपदेशात्
    "शिवोऽहम्",   # शिवः + अहम्
    "विद्याऽर्जनम्", # विद्या + अर्जनम्
    "सत्यंवद"     # सत्यं + वद
]

for w in examples:
    print(f"{w}  →  {sandhi_split(w)}")


रामोऽस्ति  →  रामः + अस्ति
गुरुपदेशात्  →  गुरुपदेशात्
शिवोऽहम्  →  शिवः + अहम्
विद्याऽर्जनम्  →  विद्याऽर्जनम्
सत्यंवद  →  सत्यं + वद


**Python Code – Sandhi-Vicheda Engine**

In [1]:
import re

# ---------------------------
# Simple Sanskrit Dictionary (expand as needed)
# ---------------------------
dictionary = {
    "रामः", "अस्ति", "गुरुः", "उपदेशात्", "शिवः", "अहम्",
    "विद्या", "अर्जनम्", "सत्यं", "वद", "लोकः", "जनः"
}

# ---------------------------
# Sandhi Rules (Reversal Patterns)
# Each rule is (pattern, replacement, description)
# ---------------------------
sandhi_rules = [
    # Visarga Sandhi
    (r"ोऽ", "ः + अ", "Visarga + vowel (oऽ = ः + अ)"),
    (r"ो ", "ः ", "Visarga at end"),
    (r"ः([अआइईउऊएऐओऔ])", r"ः + \1", "Visarga + vowel"),

    # Vowel Sandhi
    (r"आअ", "आ + अ", "Long A before A"),
    (r"एअ", "ए + अ", "E before A"),
    (r"ओअ", "ओ + अ", "O before A"),
    (r"ाऽ", "ा + अ", "ā + a (vidyāऽrjanam)"),

    # Consonant Sandhi (basic demo)
    (r"ंव", "ं + व", "Anusvāra before v"),
    (r"ंय", "ं + य", "Anusvāra before y"),
    (r"ंर", "ं + र", "Anusvāra before r"),
]

# ---------------------------
# Apply Sandhi Rules
# ---------------------------
def apply_rules(word):
    """Generate possible splits by applying sandhi rules"""
    splits = []
    for pattern, repl, desc in sandhi_rules:
        if re.search(pattern, word):
            candidate = re.sub(pattern, repl, word)
            splits.append((candidate, desc))
    return splits

# ---------------------------
# Validate against dictionary
# ---------------------------
def validate_split(split_text):
    """Check if split words exist in dictionary"""
    words = [w.strip() for w in split_text.split("+")]
    return all(word in dictionary for word in words)

# ---------------------------
# Main Sandhi-Vicheda function
# ---------------------------
def sandhi_vicheda(word):
    results = []
    candidates = apply_rules(word)

    for cand, rule in candidates:
        if validate_split(cand):
            results.append((cand, rule))

    # If nothing validated, return raw candidates
    if not results:
        return [(cand, rule) for cand, rule in candidates]

    return results

# ---------------------------
# Test Examples
# ---------------------------
examples = [
    "रामोऽस्ति",    # रामः + अस्ति
    "गुरुपदेशात्", # गुरुः + उपदेशात्
    "शिवोऽहम्",    # शिवः + अहम्
    "विद्याऽर्जनम्", # विद्या + अर्जनम्
    "सत्यंवद"     # सत्यं + वद
]

for word in examples:
    print(f"\nWord: {word}")
    results = sandhi_vicheda(word)
    if results:
        for res, rule in results:
            print(f"  → {res}   (Rule: {rule})")
    else:
        print("  → No split found")



Word: रामोऽस्ति
  → रामः + अस्ति   (Rule: Visarga + vowel (oऽ = ः + अ))

Word: गुरुपदेशात्
  → No split found

Word: शिवोऽहम्
  → शिवः + अहम्   (Rule: Visarga + vowel (oऽ = ः + अ))

Word: विद्याऽर्जनम्
  → विद्या + अर्जनम्   (Rule: ā + a (vidyāऽrjanam))

Word: सत्यंवद
  → सत्यं + वद   (Rule: Anusvāra before v)


**🔹 Recursive Sandhi-Vicheda Engine (Python)**

In [4]:
import re

# ---------------------------
# Simple Sanskrit Dictionary (expand as needed)
# ---------------------------
dictionary = {
    "रामः", "अस्ति", "गुरुः", "उपदेशात्", "उपदेशः", "शिवः", "अहम्",
    "विद्या", "अर्जनम्", "सत्यं", "वद", "लोकः", "जनः", "ज्ञानम्"
}

# ---------------------------
# Sandhi Rules (Reversal Patterns)
# ---------------------------
sandhi_rules = [
    # Visarga Sandhi
    (r"ोऽ", "ः + अ"),
    (r"ो", "ः"),
    (r"ः([अआइईउऊएऐओऔ])", r"ः + \1"),

    # Vowel Sandhi
    (r"ाऽ", "ा + अ"),
    (r"आअ", "आ + अ"),
    (r"एअ", "ए + अ"),
    (r"ओअ", "ओ + अ"),

    # Anusvara Sandhi
    (r"ं([वयरल])", r"ं + \1"),
]

# ---------------------------
# Recursive splitter
# ---------------------------
def recursive_split(word, depth=0):
    """Try to recursively split a word into dictionary words."""
    results = []

    # Base case: if the whole word is in the dictionary
    if word in dictionary:
        return [[word]]

    # Try applying all sandhi rules
    for pattern, repl in sandhi_rules:
        if re.search(pattern, word):
            candidate = re.sub(pattern, repl, word, count=1)

            # Only split if we actually got a "+" in the result
            if "+" not in candidate:
                continue

            parts = [w.strip() for w in candidate.split("+", 1)]
            if len(parts) != 2:
                continue

            left_options = recursive_split(parts[0], depth+1)
            right_options = recursive_split(parts[1], depth+1)

            for left in left_options:
                for right in right_options:
                    results.append(left + right)

    return results

# ---------------------------
# Wrapper function
# ---------------------------
def sandhi_vicheda(word):
    splits = recursive_split(word)
    # Remove duplicates
    unique_splits = []
    for s in splits:
        if s not in unique_splits:
            unique_splits.append(s)
    return unique_splits

# ---------------------------
# Test Examples
# ---------------------------
examples = [
    "रामोऽस्ति",      # रामः + अस्ति
    "गुरुपदेशात्",   # गुरुः + उपदेशात्
    "शिवोऽहम्",      # शिवः + अहम्
    "विद्याऽर्जनम्", # विद्या + अर्जनम्
    "सत्यंवद",       # सत्यं + वद
    "लोकजनः"        # लोकः + जनः
]

for word in examples:
    print(f"\nWord: {word}")
    results = sandhi_vicheda(word)
    if results:
        for r in results:
            print("  →", " + ".join(r))
    else:
        print("  → No split found")



Word: रामोऽस्ति
  → रामः + अस्ति

Word: गुरुपदेशात्
  → No split found

Word: शिवोऽहम्
  → शिवः + अहम्

Word: विद्याऽर्जनम्
  → विद्या + अर्जनम्

Word: सत्यंवद
  → सत्यं + वद

Word: लोकजनः
  → No split found
