In [None]:
"""
02_debug_syllabifier.py

Notebook/script for debugging:
- normalization
- pāda splitting
- syllable segmentation (akṣaras)
- L/G and gaṇa computation

You can plug in arbitrary mantras from the cleaned CSVs and
visually inspect the akṣara tables.
"""

In [None]:
import os

import pandas as pd
from tabulate import tabulate

from src.normalization import normalize_text
from src.pada_sandhi import split_padas
from src.syllabifier import syllabify_line

In [None]:
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
DATA_DIR = os.path.join(BASE_DIR, "data", "raw")
rig_path = os.path.join(DATA_DIR, "only_Rigveda.csv")

rig = pd.read_csv(rig_path)

print("Rigveda rows:", len(rig))

In [None]:
# Helper: pretty-print akṣara table for a single pāda
def debug_pada(pada_text: str):
    norm = normalize_text(pada_text, strip_svaras=True)
    aksharas, LG, ganas = syllabify_line(norm)
    rows = []
    for i, a in enumerate(aksharas, start=1):
        rows.append(
            [
                i,
                a.text,
                a.vowel,
                a.coda,
                a.prosodic_matra,
                a.L_or_G(),
                a.guru_reason,
            ]
        )
    print(f"\nPāda text: {pada_text}")
    print("Normalized:", norm)
    print(
        tabulate(
            rows,
            headers=["#", "akṣara", "vowel", "coda", "mātrā", "L/G", "guru_reason"],
            tablefmt="psql",
        )
    )
    print("L/G pattern:", LG)
    print("Gaṇas:", "-".join(ganas))


In [None]:
# Example: RV 1.1.1
rv_111 = rig[
    (rig["Mandal"] == 1) & (rig["Sukta"] == 1) & (rig["Mantra Number"] == 1)
].iloc[0]

print("=== RV 1.1.1 MantraText ===")
print(rv_111["MantraText"])

In [None]:
text = rv_111["MantraText"]
norm = normalize_text(text, strip_svaras=True)
print("Normalized with dandas:", norm)

padas = split_padas(norm)
print("\nSplit pādas:")
for p in padas:
    print(f"{p.index+1}: {p.text}")

In [None]:
# If you want custom pāda segmentation (e.g., 3 pādas for Gāyatrī),
# you can manually split the string here:
custom_padas = [
    "अग्निमीळे पुरोहितं",
    "यज्ञस्य देवमृत्विजम्",
    "होतारं रत्नधातमम्",
]
for ptxt in custom_padas:
    debug_pada(ptxt)


In [None]:
# Try random sample of Rigveda mantras to see if syllable counts look reasonable
sample = rig.sample(5, random_state=42)

for _, row in sample.iterrows():
    print("\n==========")
    m_id = f"RV-{row['Mandal']}.{row['Sukta']}.{row['Mantra Number']}"
    print("ID:", m_id)
    print("Chanda:", row["Chanda"])
    print("Text:", row["MantraText"])
    norm = normalize_text(row["MantraText"], strip_svaras=True)
    print("Normalized:", norm)
    padas = split_padas(norm)
    for p in padas:
        debug_pada(p.text)