# Metrical Analysis of Sanskrit Ninth Class Verb Forms

## Getting Verbal Roots 

In [None]:
!wget -O data/whitney_roots.pdf http://gretil.sub.uni-goettingen.de/gretil_elib/Whi885__Whitney_Roots-ACCENTED.pdf

In [4]:
# install pdftk if not already there. eg: for ubuntu: sudo apt install pdftk
!pdftk data/whitney_roots.pdf cat 229 output data/whitney_roots_ninth_class.pdf

In [5]:
# produces data/whitney_roots_ninth_class.txt
!pdftotext data/whitney_roots_ninth_class.pdf

Cleanup the text version manually, fixing formatting and diacritics.

Final results are in [data/whitney_roots_ninth_class_cleaned.txt](data/whitney_roots_ninth_class_cleaned.txt)

## Parsing Verbal Roots Info

In [50]:
CLASS_HEADER = "6. nā-class"
EARLIER_LANGUAGE_HEADER = "A. Earlier Language"
EARLIER_AND_LATER_LANGUAGE_HEADER = "B. Earlier and Later Language"
LATER_LANGUAGE_HEADER = "C. Later Language"

NINTH_CLASS_STRONG_MARKER = "ā"
NINTH_CLASS_WEAK_MARKER = "ī"

whitney_roots = []

language_period = None

with open("data/whitney_roots_ninth_class_cleaned.txt", 'r') as whitney_file:
    while line := whitney_file.readline():
        variant_no = None
        attestation_texts = None
        weak_only = False
        
        line = line.rstrip()
        if not line or CLASS_HEADER in line:
            continue    
        elif EARLIER_LANGUAGE_HEADER in line:
            language_period = "Earlier"
            continue
        elif EARLIER_AND_LATER_LANGUAGE_HEADER in line:
            language_period = "Earlier & Later"
            continue
        elif LATER_LANGUAGE_HEADER in line:
            language_period = "Later"
            continue
                
        line_parts = line.split()
        if line_parts[0].isdigit():
            variant_no = line_parts.pop(0)
        stem = line_parts.pop(0)
        if line_parts:
            attestation_texts = " ".join(line_parts)
        
        if stem.endswith(NINTH_CLASS_WEAK_MARKER):
            weak_only = True
        
        whitney_roots.append({
            "root": stem[:-2], # removes the last two chars
            "variant_no": variant_no,
            "stem": stem,
            "weak_only": weak_only,
            "attestation_texts": attestation_texts,
            "language_period": language_period,
        })

In [17]:
import pandas

In [51]:
df_whitney_roots = pandas.DataFrame.from_dict(whitney_roots)
df_whitney_roots.to_csv("data/whitney_roots_ninth_class.csv", index=None)
df_whitney_roots.head()

Unnamed: 0,root,variant_no,stem,weak_only,attestation_texts,language_period
0,i,,inī,True,V.,Earlier
1,is,,isṇā,False,,Earlier
2,ubh,,ubhnā,False,V.,Earlier
3,uṣ,,uṣṇā,False,V.,Earlier
4,kṣi,,kṣiṇā,False,V.B.,Earlier


## Annotating Verbal Roots with Rig Veda Attestations

Using Lubotsky's concordance, attestation info is manually added to [data/whitney_roots_ninth_class.csv](data/whitney_roots_ninth_class.csv).

Final results are in [data/roots_ninth_class.csv](data/roots_ninth_class.csv).

In [102]:
#df_roots = pandas.read_csv("data/roots_ninth_class.csv")
df_roots = pandas.read_csv("data/roots_ninth_class.csv", keep_default_na=False)

In [103]:
# TODO remove test df once we have all the annotations
#df_roots_test = df_roots[~df_roots["notes"].isna()]
df_roots_test = df_roots[df_roots["notes"].str.len() > 0]
df_roots_test.head()

Unnamed: 0,root,variant_no,stem,weak_only,attestation_texts,language_period,rig_veda_weak_attestations,rig_veda_strong_attestations,lubotsky_page_no,notes
1,is,,isṇā,False,,Earlier,,1.63.2d,1:,iSnAsi
18,vṛ,1.0,vṛṇī,True,V.,Earlier,1.180.4b 1.67.1b 4.25.3a,,2:1338-1339,avRNItam vRnIte vRnIte(accented - last syll)
37,pu,,punā,False,,Earlier & Later,9.16.3c 9.67.27d,1.133.1a 10.13.3d,1:900-,punIhi puNAmi


## Organizing Data by Verse Lines (pādas)

In [135]:
#from pprint import pprint

In [143]:
rv_lines = []

roots = df_roots_test.to_dict("records")

for root in roots:
    rv_weak_line_nos = root.pop("rig_veda_weak_attestations").split()
    rv_strong_line_nos = root.pop("rig_veda_strong_attestations").split()
    
    for line_no in rv_weak_line_nos:
        line = {"line_no": line_no, "stem_type": "weak"} | root
        # update stem vowel (since stem won't have weak marker in whitney when strong 
        # stem is attested)
        line["stem"] = line["stem"][:-1] + NINTH_CLASS_WEAK_MARKER
        rv_lines.append(line)
        
    for line_no in rv_strong_line_nos:
        rv_lines.append({"line_no": line_no, "stem_type": "strong"} | root)

pprint(rv_lines[0])

{'attestation_texts': '',
 'language_period': 'Earlier',
 'line_no': '1.63.2d',
 'lubotsky_page_no': '1:',
 'notes': 'iSnAsi',
 'root': 'is',
 'stem': 'isṇā',
 'stem_type': 'strong',
 'variant_no': '',
 'weak_only': False}


## Annotating Verse Lines

In [154]:
def annotate_line(line):
    line["text"] = "TODO"
    
    line["meter_scansion"] = "TODO"
    
    line["meter_name"] = "TODO"
    
    return line

rv_lines_annotated = [annotate_line(line) for line in rv_lines]
pprint(rv_lines_annotated[0])

{'attestation_texts': '',
 'language_period': 'Earlier',
 'line_no': '1.63.2d',
 'lubotsky_page_no': '1:',
 'meter_name': 'TODO',
 'meter_scansion': 'TODO',
 'notes': 'iSnAsi',
 'root': 'is',
 'stem': 'isṇā',
 'stem_type': 'strong',
 'text': 'TODO',
 'variant_no': '',
 'weak_only': False}


## Saving the Final Line Results

In [155]:
df_rv_lines = pandas.DataFrame.from_dict(rv_lines_annotated)
df_rv_lines.to_csv("data/rv_lines_ninth_class.csv", index=None)
df_rv_lines.head(100)

Unnamed: 0,line_no,stem_type,root,variant_no,stem,weak_only,attestation_texts,language_period,lubotsky_page_no,notes,text,meter_name,meter_scansion
0,1.63.2d,strong,is,,isṇā,False,,Earlier,1:,iSnAsi,TODO,TODO,TODO
1,1.180.4b,weak,vṛ,1.0,vṛṇī,True,V.,Earlier,2:1338-1339,avRNItam vRnIte vRnIte(accented - last syll),TODO,TODO,TODO
2,1.67.1b,weak,vṛ,1.0,vṛṇī,True,V.,Earlier,2:1338-1339,avRNItam vRnIte vRnIte(accented - last syll),TODO,TODO,TODO
3,4.25.3a,weak,vṛ,1.0,vṛṇī,True,V.,Earlier,2:1338-1339,avRNItam vRnIte vRnIte(accented - last syll),TODO,TODO,TODO
4,9.16.3c,weak,pu,,punī,False,,Earlier & Later,1:900-,punIhi puNAmi,TODO,TODO,TODO
5,9.67.27d,weak,pu,,punī,False,,Earlier & Later,1:900-,punIhi puNAmi,TODO,TODO,TODO
6,1.133.1a,strong,pu,,punā,False,,Earlier & Later,1:900-,punIhi puNAmi,TODO,TODO,TODO
7,10.13.3d,strong,pu,,punā,False,,Earlier & Later,1:900-,punIhi puNAmi,TODO,TODO,TODO
