In [7]:
import json
from collections import Counter
import pandas as pd

def summarize_chanda_and_padas(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        slokas = json.load(f)

    # Chanda Count Summary
    chanda_counter = Counter()
    padas_counter = Counter()

    for sloka in slokas:
        # Fix for cases where chanda is missing or malformed
        chanda = sloka.get("chanda", None)
        if not chanda or isinstance(chanda, list):
            chanda = "None"
        elif "list index out of range" in str(chanda):
            chanda = f"Error: list index out of range"
        chanda_counter[chanda] += 1

        # Count number of padas
        padas = sloka.get("padas", [])
        padas_count = len(padas)
        padas_counter[padas_count] += 1

    # Convert to DataFrames
    df_chanda = pd.DataFrame(chanda_counter.items(), columns=["Chanda", "Count"]).sort_values("Count", ascending=False).reset_index(drop=True)
    df_padas = pd.DataFrame(padas_counter.items(), columns=["Number of Padas", "Number of Slokas"]).sort_values("Number of Padas").reset_index(drop=True)

    return df_chanda, df_padas

# Example usage
df_chanda, df_padas = summarize_chanda_and_padas("slokas_with_chanda.json")

# Display top chandas
print("=== Chanda Summary ===")
display(df_chanda)

# Display pada distribution
print("\n=== Sloka Count by Number of Padas ===")
display(df_padas)


=== Chanda Summary ===


Unnamed: 0,Chanda,Count
0,अनुष्टुभ्,17985
1,Error: list index out of range,556
2,वंशस्थ / वंशस्थविल / वंशस्तनित,314
3,उपेन्द्रवज्रा,239
4,,198
5,इन्द्रवज्रा,170
6,अनुष्टुभ् / वंशस्थ / वंशस्थविल / वंशस्तनित,53
7,अनुष्टुभ् / उपेन्द्रवज्रा,34
8,इन्द्रवज्रा / उपेन्द्रवज्रा,28
9,औपच्छन्दसिक / पुष्पिताग्रा,25



=== Sloka Count by Number of Padas ===


Unnamed: 0,Number of Padas,Number of Slokas
0,1,198
1,2,19503
