# Analyzing the meter

## Load the data

In [178]:
import pandas

In [179]:
#df_rv_lines = pandas.read_csv("data/rv_lines.csv")
df_rv_lines = pandas.read_csv("data/rv_lines.csv", keep_default_na=False)

#df_rv_lines.head()
#df_rv_lines.dtypes

# lines with meter set
#df_rv_lines.query('stanza_meter != ""')
# filtering only on a specific meter
#df_rv_lines.query('stanza_meter == "Jagatī"').sort_values('line_no')

df_rv_lines

Unnamed: 0,line_no,stem,stem_type,root_guess,variant_no,weak_only,attestation_texts,language_period,present_class,root,...,word_gloss,text_padapatha,text_samhitapatha,meter_scansion,stanza_meter,stanza_strata,stanza_late_addition,hymn_absolute_no,hymn_addressee,hymn_group
0,01.063.02.d,iṣṇā,strong,iṣ,,False,,Earlier,ninth,iṣ 1,...,2.SG.PRS.IND.ACT,púraḥ iṣṇā́si= puruhūta pūrvī́ḥ,púra iṣṇā́si puruhūta pūrvī́ḥ,SS LLS SSLS LL,Triṣṭubh,A,,63,Indra,"Hymns of Nodhas, Descendant of Gotama"
1,01.063.04.b,ubhnā,strong,ubh,,False,V.,Earlier,ninth,ubh,...,3.SG.PRS.INJ.ACT,vr̥trám yát vajrin= vr̥ṣakarman ubhnā́ḥ,vr̥tráṁ yád vajrin vr̥ṣakarman ubhnā́ḥ,LL L LL SSLS LL,Triṣṭubh,A,,63,Indra,"Hymns of Nodhas, Descendant of Gotama"
2,04.019.04.c,ubhnā,strong,ubh,,False,V.,Earlier,ninth,ubh,...,3.SG.IPRF.IND.ACT,dr̥ḷhā́ni aubhnāt= uśámānaḥ ójaḥ,dr̥r̥ḷhā́ni+ aubhnād uśámāna ójo,SLLS LL SSLS LL,,S,,315,Indra,Hymns to Indra
3,04.018.12.d,kṣiṇā,strong,kṣi,,False,V.B.,Earlier,ninth,kṣī,...,2.SG.IPRF.IND.ACT,yát prá ákṣiṇāḥ= pitáram pādagŕ̥hya,yát prā́kṣiṇāḥ pitáram pādagŕ̥hya,L LSL SSL LSLS,Triṣṭubh,P,"['Grassmann (G)', 'Arnold (C1)']",314,"Dialogue Between Indra, Aditi and Vamadeva",Hymns to Indra
4,10.027.04.d,kṣiṇā,strong,kṣi,,False,V.B.,Earlier,ninth,kṣī,...,1.SG.PRS.INJ.ACT,prá tám kṣiṇām= párvate-_ pādagŕ̥hya,prá táṁ kṣiṇām párvate pādagŕ̥hya,S L SL LSL LSLS,Triṣṭubh,P,['Arnold (C1)'],853,Indra,The Vasukra Hymns
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
876,09.097.04.b,hino,strong,hi,,False,,Earlier & Later,fifth,hi,...,2.PL.PRS.IMP.ACT,sómam hinota= mahaté-_} dhánāya,sómaṁ hinota mahaté dhánāya,LL SLS SSL SLS,Triṣṭubh,N,,809,Soma,The Tristubh Group
877,10.016.09.a,hino,strong,hi,,False,,Earlier & Later,fifth,hi,...,1.SG.PRS.IND.ACT,kravyā́dam agním= prá hinomi dūrám,kravyā́dam agním prá hiṇomi dūráṁ,LLS LL S SLS LL,Triṣṭubh,P,['Arnold (C2)'],842,Fire of Cremation,Yama Hymns
878,10.030.07.d,hino,strong,hi,,False,,Earlier & Later,fifth,hi,...,2.PL.PRS.IMP.ACT,devamā́danam= prá hinotana āpaḥ,devamā́danam prá hiṇotanāpaḥ,LSLSL S SLSLL,Triṣṭubh,N,,856,The Waters,Hymns of Kavasa Ailusa
879,10.030.08.a,hino,strong,hi,,False,,Earlier & Later,fifth,hi,...,2.PL.PRS.IMP.ACT,prá asmai hinota= mádhumantam ūrmím,prā́smai hinota mádhumantam ūrmíṁ,LL SLS SSLS LL,Triṣṭubh,N,,856,The Waters,Hymns of Kavasa Ailusa


## Syllabification, scansion, meter correctness and stem position

In [180]:
# from meter.py, in the same folder as this notebook
import meter

# useful during testing to pick up changes in the file
import importlib
importlib.reload(meter)


def add_meter_analysis(line):
    analysis = meter.analyze(line["text_samhitapatha"], line["stanza_meter"], line["stem"])

    # disabling the guess of stanza meter, does not alter our core result (count of nA/nI in
    # expected short syllables) even with a very permissive guess so no point in pursuing
    # this avenue right now, since we need to be careful with the guess assignment anyway
    #if "stanza_meter_guessed" in analysis:
    #    # TODO add guessed at the end here?
    #    line["stanza_meter"] = analysis["stanza_meter_guessed"]
    
    # FIXME check if avagraha (o_' / o_a) is present in our lines of interest and
    # decide how to deal with the 'o' vowel    
    #if "o a" in line["text_samhitapatha"]:
    #    print(f'{line["line_no"]}: {line["text_samhitapatha"]}')

    # TODO rename this as the default meter scansion
    # scansion results
    line["meter_scansion_custom"] = analysis["scansion"]
    line["meter_has_restorations"] = analysis["has_restorations"]
    line["meter_caesura_position"] = analysis["caesura_position"]
    line["meter_syllables_count"] = analysis["no_of_syllables"]
    line["meter_syllables"] = analysis["syllables"]
    line["meter_notes"] = analysis["notes"]
    line["meter_scansions_match"] = \
        analysis["scansion_syllables"] == meter.clean_meter_scansion(line["meter_scansion"])
    
    # we check for these in the next section by querying dataframe directly
    #if not line["meter_scansions_match"]:
    #    print(
    #        f"{line['line_no']}:", 
    #        f"{line['meter_scansion_custom']} ≠ {line['meter_scansion']}",
    #        f"[{line['text_samhitapatha']}] ({line['meter_syllables_count']} {line['stanza_meter']})"
    #    )
    
    # fields related to meter correctness
    line["meter_is_correct"] = analysis["is_correct"]
    line["meter_faults"] = analysis["faults"]
    line["meter_fault_positions"] = analysis["fault_positions"]
    
    # text that reflects the final metrical analysis really
    line["text_samhitapatha_normalized"] = analysis["text_normalized"]

    # where in the meter the stem is and which variant of the stem was found
    line["meter_stem_found"] = analysis["search_term_found"]
    line["meter_stem_positions"] = analysis["search_term_positions"]
    line["meter_stem_fault_positions"] = analysis["search_term_fault_positions"]
    
    # compute extra positions needed for our analysis
    # TODO! check for multiple matches in one line 
    line["meter_stem_vowel_position"] = line["meter_stem_positions"][-1] # last item
    line["meter_stem_root_vowel_position"] = line["meter_stem_positions"][:-1][-1] # last item after ignoring stem vowel
    # TODO remove this?
    meter_stem_fault_position_labels = []
    for position in line["meter_stem_fault_positions"]:
        if position == line["meter_stem_vowel_position"]:
            label = "stem_vowel"
        elif position == line["meter_stem_root_vowel_position"]:
            label = "root_vowel"
        else:
            label = "preceding_syllable"
        meter_stem_fault_position_labels.append(label)
    line["meter_stem_fault_position_labels"] = " ".join(meter_stem_fault_position_labels)
    
    # add expected scansion for the stem vowel and root vowel
    line["meter_stem_vowel_position_scansion_expected"] = meter.get_expected_scansion(
        line["meter_stem_vowel_position"], line["stanza_meter"], line["meter_caesura_position"]
    )
    line["meter_stem_root_vowel_position_scansion_expected"] = meter.get_expected_scansion(
        line["meter_stem_root_vowel_position"], line["stanza_meter"], line["meter_caesura_position"]
    ) 
        
    return line


df_rv_lines_annotated = df_rv_lines \
    .apply(add_meter_analysis, axis=1) # axis=1 makes this operate on rows

# TODO remove this from here: temporarirly here for testing
df_rv_lines_annotated.to_csv("data/rv_lines_with_meter.csv", index=None)

df_rv_lines_annotated.head()

Unnamed: 0,line_no,stem,stem_type,root_guess,variant_no,weak_only,attestation_texts,language_period,present_class,root,...,meter_fault_positions,text_samhitapatha_normalized,meter_stem_found,meter_stem_positions,meter_stem_fault_positions,meter_stem_vowel_position,meter_stem_root_vowel_position,meter_stem_fault_position_labels,meter_stem_vowel_position_scansion_expected,meter_stem_root_vowel_position_scansion_expected
0,01.063.02.d,iṣṇā,strong,iṣ,,False,,Earlier,ninth,iṣ 1,...,[],púra iṣṇā́si puruhūta pūrvī́ḥ,iṣṇā,"[3, 4]",[],4,3,,X,X
1,01.063.04.b,ubhnā,strong,ubh,,False,V.,Earlier,ninth,ubh,...,[],vr̥tráṁ yád vajrin vr̥ṣakarman ubhnā́ḥ,ubhnā,"[10, 11]",[],11,10,,X,L
2,04.019.04.c,ubhnā,strong,ubh,,False,V.,Earlier,ninth,ubh,...,[],dr̥̄ḷhā́ni aubhnād uśámāna ójo,ubhnā,"[4, 5]",[],5,4,,,
3,04.018.12.d,kṣiṇā,strong,kṣi,,False,V.B.,Earlier,ninth,kṣī,...,[],yát prā́kṣiṇāḥ pitáram pādagŕ̥hya,kṣiṇā,"[2, 3, 4]",[],4,3,,X,X
4,10.027.04.d,kṣiṇā,strong,kṣi,,False,V.B.,Earlier,ninth,kṣī,...,[],prá táṁ kṣiṇām párvate pādagŕ̥hya,kṣiṇā,"[3, 4]",[],4,3,,X,X


## Validation

### Checking our scansion yields same final result as vedaweb

In [181]:
COLUMNS_SELECTED = [
    "line_no", "stem", "text_padapatha",
    "text_samhitapatha", "text_samhitapatha_normalized",
    "meter_scansion", "meter_scansion_custom",
    "meter_syllables", "meter_syllables_count",
    "stanza_meter",
    "meter_caesura_position",
    "meter_has_restorations", "meter_notes",
    "meter_scansions_match",
    "meter_is_correct", "meter_faults",
    "meter_stem_found", "meter_stem_positions",
    "meter_stem_root_vowel_position", "meter_stem_vowel_position",
    "meter_stem_root_vowel_position_scansion_expected", "meter_stem_vowel_position_scansion_expected", 
    "meter_fault_positions",
    "meter_stem_fault_positions", "meter_stem_fault_position_labels",
]

df_rv_lines_annotated[COLUMNS_SELECTED].query('meter_scansions_match == False')

Unnamed: 0,line_no,stem,text_padapatha,text_samhitapatha,text_samhitapatha_normalized,meter_scansion,meter_scansion_custom,meter_syllables,meter_syllables_count,stanza_meter,...,meter_faults,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position_scansion_expected,meter_fault_positions,meter_stem_fault_positions,meter_stem_fault_position_labels
2,04.019.04.c,ubhnā,dr̥ḷhā́ni aubhnāt= uśámānaḥ ójaḥ,dr̥r̥ḷhā́ni+ aubhnād uśámāna ójo,dr̥̄ḷhā́ni aubhnād uśámāna ójo,SLLS LL SSLS LL,LLS LL_SSLS LL,"[dr̥̄, ḷhā́, ni, aubh, nā, d u, śá, mā, na, ó,...",11,,...,,ubhnā,"[4, 5]",4,5,,,[],[],
58,03.049.02.d,minā mīnā,pr̥thujráyāḥ= amināt ā́yuḥ dásyoḥ,pr̥thujráyā aminād ā́yu@ dásyoḥ,pr̥thujráyā aminād ā́yu dásyoḥ,SLSL SSL LL LL,"SLSL ,SSL|_LS LL","[pr̥, thuj, rá, yā, a, mi, nā, d ā́, yu, dás, ...",11,Triṣṭubh,...,,minā,"[6, 7]",6,7,S,X,[],[],
111,09.011.06.b,śrīṇī,dadhnā́ ít abhí śrīṇītana,dadhnéd abhí śriṇītana@,dadhnéd abhí śriṇītana,LL SL SLSL,LL_S_L|SLSS,"[dadh, né, d a, bhí ś, ri, ṇī, ta, na]",8,Gāyatrī,...,,śriṇī,"[4, 5, 6]",5,6,S,L,[],[],
163,01.048.04.d,gr̥ṇā,nā́ma gr̥ṇāti nr̥ṇā́m,nā́ma gr̥ṇāti nr̥r̥ṇáam+,nā́ma gr̥ṇāti nr̥̄ṇáam,LS SLS SSSL,LS SLS LSL,"[nā́, ma, gr̥, ṇā, ti, nr̥̄, ṇá, am]",8,,...,,gr̥ṇā,"[3, 4]",3,4,,,[],[],
363,01.174.09.b,r̥ṇo,r̥ṇóḥ apáḥ sīrā́ḥ ná+_ srávantīḥ,r̥ṇór apáḥ ̀ sīrā́ ná srávantīḥ,r̥ṇór apáḥ ̀ sīrā́ ná srávantīḥ,SL SS · LL L SLL,"SL_SL · ,LL |_LSLL","[r̥, ṇó, r a, páḥ, ̀, sī, rā́, ná s, rá, van,...",11,Triṣṭubh,...,scansion_post_caesura=LL,r̥ṇo,"[1, 2]",1,2,X,X,[7],[],
365,06.020.12.b,r̥ṇo,r̥ṇóḥ apáḥ sīrā́ḥ ná+_ srávantīḥ,r̥ṇór apáḥ ̀ sīrā́ ná srávantīḥ,r̥ṇór apáḥ ̀ sīrā́ ná srávantīḥ,SL SS · LL L SLL,"SL_SL · ,LL |_LSLL","[r̥, ṇó, r a, páḥ, ̀, sī, rā́, ná s, rá, van,...",11,Triṣṭubh,...,scansion_post_caesura=LL,r̥ṇo,"[1, 2]",1,2,X,X,[7],[],
493,10.034.14.a,kr̥ṇu,mitrám kr̥ṇudhvam= khálu mr̥ḷátā+ naḥ,mitráṁ kr̥ṇudhvaṁ khálu mr̥r̥ḷátā+ no,mitráṁ kr̥ṇudhvaṁ khálu mr̥̄ḷátā no,LL SLL SS SSSL L,LL SLL SS LSL L,"[mit, ráṁ, kr̥, ṇudh, vaṁ, khá, lu, mr̥̄, ḷá, ...",11,,...,,kr̥ṇu,"[3, 4]",3,4,,,[],[],
681,01.010.08.d,dhunu dhūnu,sám gā́ḥ asmábhyam dhūnuhi,sáṁ gā́ asmábhya@ dhūnuhi,sáṁ gā́ asmábhya dhūnuhi,L L LLL LSS,L L LL|S LSS,"[sáṁ, gā́, as, mábh, ya, dhū, nu, hi]",8,Aṇuṣṭubh,...,,dhūnu,"[6, 7]",6,7,L,S,[],[],
769,08.084.03.b,śr̥ṇu,nr̥ŕ̥n pāhi śr̥ṇudhī́?_+ gíraḥ,nr̥ŕ̥m̐ḥ pāhi śr̥ṇudhī́ gíraḥ,nr̥̄́m̐ḥ pāhi śr̥ṇudhī́ gíraḥ,SL LS SSL SL,L_LS SSL SL,"[nr̥̄́m̐, ḥ pā, hi, śr̥, ṇu, dhī́, gí, raḥ]",8,,...,,śr̥ṇu,"[4, 5]",4,5,,,[],[],


These reasons for differences here can be safely ignored:

* Vedaweb does not clean the character `@` (used to mark [edited forms](https://lrc.la.utexas.edu/books/rigveda/RV00#bolle)) from the vnh text before applying metrical scansion while we do.
* Vedaweb currently treats `ḷh` as combination of two characters while we correctly treat it as single.
* Vedaweb currently treats `ḷ` between consonants as a consonant too, while we correctly treat it as the vowel `l̥`. Note: instances of `ḷ` in our lines are all consonantal (as in 04.019.04.c, 07.007.03.b, 10.089.06.d, 02.013.10.c), hence we don't actually see this in action in the above table.
* Vedaweb treats sequences `r̥r̥` and `r̥ŕ̥` as different chars, while we correctly treat them as single unit r̥̄ and r̥̄́ respectively (`ŕ̥r̥` is still treated as different chars which is needed for the meter). As in: 04.019.04.c, 01.048.04.d (as seen in the above table too)
* Vedaweb treats sequence like `paḥ` as short before a metrical pause, while we correctly treat them as long.

Note: [04.019.04.c](https://vedaweb.uni-koeln.de/rigveda/view/id/4.19.4) doesn't have a meter specified but other stanzas in the hymn are Triṣṭubh, and once we treat `r̥r̥` as a unit, it actually follows the meter.

#### Text normalization notes

`l̥` and `r̥̄` are not actually not to be found in the vnh text we have, since it's transliterated using IAST, which [merges](https://en.wikipedia.org/wiki/International_Alphabet_of_Sanskrit_Transliteration#Comparison_with_ISO_15919) vowel `l̥` with consonant `ḷ` and for `r̥̄`, writes `r̥r̥` instead. This has consequences for the meter, so we normalize the instances of those before we do the metrical analysis.

Examples demonstrating this behaviour in vedaweb:

https://vedaweb.uni-koeln.de/rigveda/view/id/10.157.02  
https://vedaweb.uni-koeln.de/rigveda/view/id/01.030.15

In the texas version, we don't see these issues:

https://lrc.la.utexas.edu/books/rigveda/RV10#H157  
https://lrc.la.utexas.edu/books/rigveda/RV01#H030

Similarly, we normalize `ch` spelling to `cch`.

### Checking no of lines that have had metrical restorations

Lines that have metrical restorations done ([e.g.](https://vedaweb.uni-koeln.de/rigveda/view/id/9.11.6)) For more details, see https://lrc.la.utexas.edu/books/rigveda/RV00#bolle 

In [182]:
df_rv_lines_annotated[COLUMNS_SELECTED].query('meter_has_restorations == True')

Unnamed: 0,line_no,stem,text_padapatha,text_samhitapatha,text_samhitapatha_normalized,meter_scansion,meter_scansion_custom,meter_syllables,meter_syllables_count,stanza_meter,...,meter_faults,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position_scansion_expected,meter_fault_positions,meter_stem_fault_positions,meter_stem_fault_position_labels
2,04.019.04.c,ubhnā,dr̥ḷhā́ni aubhnāt= uśámānaḥ ójaḥ,dr̥r̥ḷhā́ni+ aubhnād uśámāna ójo,dr̥̄ḷhā́ni aubhnād uśámāna ójo,SLLS LL SSLS LL,LLS LL_SSLS LL,"[dr̥̄, ḷhā́, ni, aubh, nā, d u, śá, mā, na, ó,...",11,,...,,ubhnā,"[4, 5]",4,5,,,[],[],
31,05.005.05.c,pr̥ṇī,prá-pra yajñám pr̥ṇītana,prá-pra yajñám pr̥ṇītana,prápra yajñám pr̥ṇītana,LS LL SLSS,LS LL |SLSS,"[práp, ra, yaj, ñám, pr̥, ṇī, ta, na]",8,Gāyatrī,...,,pr̥ṇī,"[5, 6]",5,6,S,L,[],[],
48,01.025.01.c,minī mīnī,minīmási dyávi-dyavi,minīmási dyávi-dyavi,minīmási dyávidyavi,SLSL SLSS,SLS_L|SLSS,"[mi, nī, má, si d, yá, vid, ya, vi]",8,Gāyatrī,...,,minī,"[1, 2]",1,2,X,X,[],[],
55,02.012.05.c,minā mīnā,sáḥ aryáḥ puṣṭī́ḥ= víjaḥ iva ā́} mināti,só aryáḥ puṣṭī́r víja 'vā́@ mināti,só aryáḥ puṣṭī́r víja 'vā́ mināti,L LL LL SS L SLS,"L LL LL ,SS |L SLS","[só, ar, yáḥ, puṣ, ṭī́r, ví, ja, 'vā́, mi, nā,...",11,Triṣṭubh,...,,minā,"[9, 10]",9,10,S,L,[],[],
58,03.049.02.d,minā mīnā,pr̥thujráyāḥ= amināt ā́yuḥ dásyoḥ,pr̥thujráyā aminād ā́yu@ dásyoḥ,pr̥thujráyā aminād ā́yu dásyoḥ,SLSL SSL LL LL,"SLSL ,SSL|_LS LL","[pr̥, thuj, rá, yā, a, mi, nā, d ā́, yu, dás, ...",11,Triṣṭubh,...,,minā,"[6, 7]",6,7,S,X,[],[],
111,09.011.06.b,śrīṇī,dadhnā́ ít abhí śrīṇītana,dadhnéd abhí śriṇītana@,dadhnéd abhí śriṇītana,LL SL SLSL,LL_S_L|SLSS,"[dadh, né, d a, bhí ś, ri, ṇī, ta, na]",8,Gāyatrī,...,,śriṇī,"[4, 5, 6]",5,6,S,L,[],[],
131,01.155.04.a,gr̥ṇī,tát-tat ít asya= paúṁsyam} gr̥ṇīmasi,tát-tad íd asya paúṁsiyaṁ gr̥ṇīmasi,táttad íd asya paúṁsiyaṁ gr̥ṇīmasi,LS S LS LSL SLSS,LS_S_LS LSL SLSS,"[tát, ta, d í, d as, ya, paúṁ, si, yaṁ, gr̥, ṇ...",12,,...,,gr̥ṇī,"[9, 10]",9,10,,,[],[],
132,01.186.03.a,gr̥ṇī,préṣṭham vaḥ átithim gr̥ṇīṣe-_,práyiṣṭhaṁ+ vo ̀ átithiṁ gr̥ṇīṣe,práyiṣṭhaṁ vo ̀ átithiṁ gr̥ṇīṣe,SLL L · SSL SLL,"SLL L · ,SS|L SLL","[prá, yiṣ, ṭhaṁ, vo, ̀, á, ti, thiṁ, gr̥, ṇī,...",11,Triṣṭubh,...,,gr̥ṇī,"[9, 10]",9,10,S,L,[],[],
134,02.020.04.a,gr̥ṇī,tám u+_ stuṣe-_= índram tám} gr̥ṇīṣe-_,tám u stuṣa índaraṁ+ táṁ gr̥ṇīṣe,tám u stuṣa índaraṁ táṁ gr̥ṇīṣe,S L SS LSL L SLL,"S_LSS ,LSL |L SLL","[tá, m u s, tu, ṣa, ín, da, raṁ, táṁ, gr̥, ṇī,...",11,Triṣṭubh,...,,gr̥ṇī,"[9, 10]",9,10,S,L,[],[],
138,03.006.10.b,gr̥ṇī,yajñáṁ-yajñam= abhí vr̥dhé-_} gr̥ṇītáḥ,yajñáṁ-yajñam abhí vr̥dhé gr̥ṇītáḥ,yajñáṁyajñam abhí vr̥dhé gr̥ṇītáḥ,LLLS SS SL SLL,LLLS_SS SL SLL,"[yaj, ñáṁ, yaj, ña, m a, bhí, vr̥, dhé, gr̥, ṇ...",11,,...,,gr̥ṇī,"[9, 10]",9,10,,,[],[],


Need to be mindful of these lines during our analysis.

### Checking no of lines that have other caesura positions eligible

In [183]:
df_rv_lines_annotated[COLUMNS_SELECTED].query(
    'meter_notes.str.contains("caesura_position_alt")', engine='python'
)

Unnamed: 0,line_no,stem,text_padapatha,text_samhitapatha,text_samhitapatha_normalized,meter_scansion,meter_scansion_custom,meter_syllables,meter_syllables_count,stanza_meter,...,meter_faults,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position_scansion_expected,meter_fault_positions,meter_stem_fault_positions,meter_stem_fault_position_labels
36,01.125.05.b,pr̥ṇā,yáḥ pr̥ṇā́ti= sá ha devéṣu gacchati,yáḥ pr̥ṇā́ti sá ha devéṣu gachati,yáḥ pr̥ṇā́ti sá ha devéṣu gacchati,L SLS S S LLS LSS,"L SLS ,S S L|LS LSS","[yáḥ, pr̥, ṇā́, ti, sá, ha, de, vé, ṣu, gac, c...",12,Jagatī,...,,pr̥ṇā,"[2, 3]",2,3,X,X,[],[],
37,02.030.07.c,pr̥ṇā,yáḥ me-_ pr̥ṇā́t= yáḥ dádat yáḥ} nibódhāt,yó me pr̥ṇā́d yó dádad yó nibódhād,yó me pr̥ṇā́d yó dádad yó nibódhād,L L SL L SL L SLL,"L L SL ,L SL |L SLL","[yó, me, pr̥, ṇā́d, yó, dá, dad, yó, ni, bó, d...",11,Triṣṭubh,...,,pr̥ṇā,"[3, 4]",3,4,X,X,[],[],
40,06.047.15.a,pr̥ṇā,káḥ īm stavat= káḥ pr̥ṇāt káḥ} yajāte?_,ká īṁ stavat káḥ pr̥ṇāt kó yajāte,ká īṁ stavat káḥ pr̥ṇāt kó yajāte,S L SL L SL L SLL,"S L SL ,L SL |L SLL","[ká, īṁ, sta, vat, káḥ, pr̥, ṇāt, kó, ya, jā, te]",11,Triṣṭubh,...,,pr̥ṇā,"[6, 7]",6,7,S,X,[],[],
56,03.030.12.a,minā mīnā,díśaḥ sū́ryaḥ= ná mināti} prádiṣṭāḥ,díśaḥ sū́ryo ná mināti prádiṣṭā,díśaḥ sū́ryo ná mināti prádiṣṭā,SL LL S SLL SLL,"SL LL ,S SL|_LSLL","[dí, śaḥ, sū́r, yo, ná, mi, nā, ti p, rá, diṣ,...",11,Triṣṭubh,...,,minā,"[6, 7]",6,7,S,X,[],[],
117,02.033.15.b,hr̥ṇī,yáthā deva= ná hr̥ṇīṣé?_} ná háṁsi,yáthā deva ná hr̥ṇīṣé ná háṁsi,yáthā deva ná hr̥ṇīṣé ná háṁsi,SL LS S SLL S LS,"SL LS ,S SL|L S LS","[yá, thā, de, va, ná, hr̥, ṇī, ṣé, ná, háṁ, si]",11,Triṣṭubh,...,,hr̥ṇī,"[6, 7]",6,7,S,X,[],[],
184,06.009.02.a,jānā,ná ahám tántum= ná ví jānāmi ótum,nā́háṁ tántuṁ ná ví jānāmi ótuṁ,nā́háṁ tántuṁ ná ví jānāmi ótuṁ,LL LL S S LLS LL,"LL LL ,S S L|LS LL","[nā́, háṁ, tán, tuṁ, ná, ví, jā, nā, mi, ó, tuṁ]",11,Triṣṭubh,...,,jānā,"[7, 8]",7,8,X,L,[],[],
185,06.009.03.a,jānā,sáḥ ít tántum= sá ví jānāti ótum,sá ít tántuṁ sá ví jānāti ótuṁ,sá ít tántuṁ sá ví jānāti ótuṁ,S L LL S S LLS LL,"S L LL ,S S L|LS LL","[sá, ít, tán, tuṁ, sá, ví, jā, nā, ti, ó, tuṁ]",11,Triṣṭubh,...,,jānā,"[7, 8]",7,8,X,L,[],[],
364,06.018.05.d,r̥ṇo,r̥ṇóḥ púraḥ= ví dúraḥ asya víśvāḥ,r̥ṇóḥ púro ví dúro asya víśvāḥ,r̥ṇóḥ púro ví dúro asya víśvāḥ,SL SL S SL LS LL,"SL SL ,S SL |LS LL","[r̥, ṇóḥ, pú, ro, ví, dú, ro, as, ya, víś, vāḥ]",11,Triṣṭubh,...,,r̥ṇo,"[1, 2]",1,2,X,X,[],[],
563,04.050.09.c,kr̥ṇo,avasyáve?_= yáḥ várivaḥ} kr̥ṇóti,avasyáve yó várivaḥ kr̥ṇóti,avasyáve yó várivaḥ kr̥ṇóti,SLSL L SSL SLS,"SLSL ,L SS|L SLS","[a, vas, yá, ve, yó, vá, ri, vaḥ, kr̥, ṇó, ti]",11,Triṣṭubh,...,,kr̥ṇo,"[9, 10]",9,10,S,L,[],[],
634,01.055.07.d,dabhnu,ná tvā kétāḥ= ā́ dabhnuvanti bhū́rṇayaḥ,ná tvā kétā ā́ dabhnuvanti bhū́rṇayaḥ,ná tvā kétā ā́ dabhnuvanti bhū́rṇayaḥ,L L LL L LSLS LSL,"_LL LL ,L LS|LS LSL","[ná t, vā, ké, tā, ā́, dabh, nu, van, ti, bhū́...",12,Jagatī,...,scansion_post_caesura=LL,dabhnu,"[6, 7]",6,7,S,X,[6],[6],root_vowel


### Checking no of lines that have stanza meter not set

In [102]:
# total: 77 > 194
#df_rv_lines_annotated[COLUMNS_SELECTED].query('stanza_meter == ""')

## Analysis

### Lines not following the meter

In [184]:
# same as below
#df_rv_lines_annotated[COLUMNS_SELECTED].query('meter_faults != ""')

df_rv_lines_annotated[COLUMNS_SELECTED].query('meter_is_correct == 0')
#len(df_rv_lines_annotated[COLUMNS_SELECTED].query('meter_is_correct == 0')) #44

# 0 results for meter failures due to no of syllables
# means we don't have mismatches with what's assigned for the stanza and pada
# (van holland/nooten X -- can happen)
#df_rv_lines_annotated[COLUMNS_SELECTED].query(
#    'meter_faults.str.contains("syllables")', engine='python'
#)

Unnamed: 0,line_no,stem,text_padapatha,text_samhitapatha,text_samhitapatha_normalized,meter_scansion,meter_scansion_custom,meter_syllables,meter_syllables_count,stanza_meter,...,meter_faults,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position_scansion_expected,meter_fault_positions,meter_stem_fault_positions,meter_stem_fault_position_labels
21,10.145.04.a,gr̥bhṇā,nahí asyā+ nā́ma gr̥bhṇā́mi,nahy a\syā nā́ma gr̥bhṇā́mi,nahy a\syā nā́ma gr̥bhṇā́mi,L LL LS LLS,L_LL L|S LLS,"[nah, y a\s, yā, nā́, ma, gr̥bh, ṇā́, mi]",8,Aṇuṣṭubh,...,scansion_cadence=SLLX,gr̥bhṇā,"[6, 7]",6,7,L,S,[7],[7],stem_vowel
59,05.007.04.d,minā mīnā,prá smā+ minā́ti ajáraḥ,prá smā minā́ti ajáraḥ,prá smā minā́ti ajáraḥ,L L SLS SSL,_LL SL|S SSL,"[prá s, mā, mi, nā́, ti, a, já, raḥ]",8,Aṇuṣṭubh,...,scansion_cadence=SSSX,minā,"[3, 4]",3,4,X,X,[6],[],
62,07.036.04.c,minā mīnā,prá yáḥ manyúm= rírikṣataḥ} minā́ti,prá yó manyúṁ rírikṣato minā́ti,prá yó manyúṁ rírikṣato minā́ti,S L LL SLSL SLS,"S L LL ,SLS|L SLS","[prá, yó, man, yúṁ, rí, rik, ṣa, to, mi, nā́, ti]",11,Triṣṭubh,...,scansion_post_caesura=SL,minā,"[9, 10]",9,10,S,L,[6],[],
78,01.117.04.c,riṇī,sám tám riṇīthaḥ= víprutam} dáṁsobhiḥ,sáṁ táṁ riṇītho víprutaṁ dáṁsobhir,sáṁ táṁ riṇītho víprutaṁ dáṁsobhir,L L SLL LSL LLL,"L L SLL ,LS|L LLL","[sáṁ, táṁ, ri, ṇī, tho, víp, ru, taṁ, dáṁ, so,...",11,Triṣṭubh,...,scansion_cadence=LLLX,riṇī,"[3, 4]",3,4,X,X,[9],[],
81,01.124.07.d,riṇī,uṣā́ḥ hasrā́ iva= ní riṇīte?_ ápsaḥ,uṣā́ hasréva ní riṇīte ápsaḥ,uṣā́ hasréva ní riṇīte ápsaḥ,SL LLS S SLL LL,"SL LLS ,S S|LL LL","[u, ṣā́, has, ré, va, ní, ri, ṇī, te, áp, saḥ]",11,Triṣṭubh,...,scansion_cadence=LLLX,riṇī,"[7, 8]",7,8,S,L,[9],[],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,01.122.09.b,suno,apáḥ ná+_ vām= sunóti akṣṇayādhrúk,apó ná vāṁ sunóti akṣṇayādhrúk,apó ná vāṁ sunóti akṣṇayādhrúk,SL S L SLS LSLL,"SL S L ,SLS |LSLL","[a, pó, ná, vāṁ, su, nó, ti, ak, ṣṇa, yādh, rúk]",11,Triṣṭubh,...,scansion_post_caesura=SL,suno,"[5, 6]",5,6,X,S,[6],[6],stem_vowel
835,04.035.06.a,suno,yáḥ vaḥ sunóti= abhipitvé-_ áhnām,yó vaḥ sunóti abhipitvé áhnāṁ,yó vaḥ sunóti abhipitvé áhnāṁ,L L SLS SSLL LL,"L L SLS ,SS|LL LL","[yó, vaḥ, su, nó, ti, a, bhi, pit, vé, áh, nāṁ]",11,Triṣṭubh,...,scansion_cadence=LLLX,suno,"[3, 4]",3,4,X,X,[9],[],
860,01.061.04.a,hino,asmaí ít u+_ stómam sám hinomi,asmā́ íd u ̀ stómaṁ sáṁ hinomi,asmā́ íd u ̀ stómaṁ sáṁ hinomi,LL S S · LL L SLS,"LL S_S · ,LL |L SLS","[as, mā́, í, d u, ̀, stó, maṁ, sáṁ, hi, no, mi]",11,Triṣṭubh,...,scansion_post_caesura=LL,hino,"[9, 10]",9,10,S,L,[7],[],
863,02.014.04.d,hino,tám índram sómasya bhr̥thé?_} hinota,tám índraṁ sómasya bhr̥thé hinota,tám índraṁ sómasya bhr̥thé hinota,S LL LLS SL SLS,S_LL LLS S|L SLS,"[tá, m ín, draṁ, só, mas, ya, bhr̥, thé, hi, n...",11,Triṣṭubh,...,caesura_position=0,hino,"[9, 10]",9,10,S,L,[],[],


### Lines with variant stem attestations

In [185]:
# FIXME!! analyze fifth class forms too
df_rv_lines_annotated[COLUMNS_SELECTED].query('meter_stem_found != stem')

Unnamed: 0,line_no,stem,text_padapatha,text_samhitapatha,text_samhitapatha_normalized,meter_scansion,meter_scansion_custom,meter_syllables,meter_syllables_count,stanza_meter,...,meter_faults,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position_scansion_expected,meter_fault_positions,meter_stem_fault_positions,meter_stem_fault_position_labels
48,01.025.01.c,minī mīnī,minīmási dyávi-dyavi,minīmási dyávi-dyavi,minīmási dyávidyavi,SLSL SLSS,SLS_L|SLSS,"[mi, nī, má, si d, yá, vid, ya, vi]",8,Gāyatrī,...,,minī,"[1, 2]",1,2,X,X,[],[],
49,10.134.07.a,minī mīnī,nákiḥ devāḥ minīmasi,nákir devā minīmasi,nákir devā minīmasi,SL LL SLSS,SL LL SLSS,"[ná, kir, de, vā, mi, nī, ma, si]",8,,...,,minī,"[5, 6]",5,6,,,[],[],
50,01.032.04.b,minā mīnā,ā́t māyínām= ámināḥ prá utá māyā́ḥ,ā́n māyínām ámināḥ prótá māyā́ḥ,ā́n māyínām ámināḥ prótá māyā́ḥ,L LSL SSL LS LL,"L LSL,_SSL |LS LL","[ā́n, mā, yí, nā, m á, mi, nāḥ, pró, tá, mā, y...",11,Triṣṭubh,...,,minā,"[6, 7]",6,7,S,X,[],[],
51,01.071.10.c,minā mīnā,nábhaḥ ná+_ rūpám= jarimā́} mināti,nábho ná rūpáṁ jarimā́ mināti,nábho ná rūpáṁ jarimā́ mināti,SL S LL SSL SLS,"SL S LL ,SS|L SLS","[ná, bho, ná, rū, páṁ, ja, ri, mā́, mi, nā, ti]",11,Triṣṭubh,...,,minā,"[9, 10]",9,10,S,L,[],[],
52,01.123.09.c,minā mīnā,r̥tásya yóṣā= ná mināti dhā́ma,r̥tásya yóṣā ná mināti dhā́ma,r̥tásya yóṣā ná mināti dhā́ma,SLS LL S SLS LS,"SLS LL ,S S|LS LS","[r̥, tás, ya, yó, ṣā, ná, mi, nā, ti, dhā́, ma]",11,Triṣṭubh,...,,minā,"[7, 8]",7,8,S,L,[],[],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
767,08.052.08.d,śr̥ṇu,kaṇvavát śr̥ṇudhī?_+ hávam,kaṇvavác chr̥ṇudhī hávam,kaṇvavác cchr̥ṇudhī hávam,LSL SSL SL,LSL SSL SL,"[kaṇ, va, vác, cchr̥, ṇu, dhī, há, vam]",8,,...,,cchr̥ṇu,"[4, 5]",4,5,,,[],[],
815,10.062.04.b,śr̥ṇo,dévaputrāḥ= r̥ṣayaḥ tát} śr̥ṇotana,dévaputrā r̥ṣayas tác chr̥ṇotana,dévaputrā r̥ṣayas tác cchr̥ṇotana,LSLL SSL L SLSS,LSLL SSL L SLSS,"[dé, va, put, rā, r̥, ṣa, yas, tác, cchr̥, ṇo,...",12,,...,,cchr̥ṇo,"[9, 10]",9,10,,,[],[],
856,10.016.01.d,hinu,átha īm enam= prá hinutāt} pitŕ̥bhyaḥ,áthem enam prá hiṇutāt pitŕ̥bhyaḥ,áthem enam prá hiṇutāt pitŕ̥bhyaḥ,SL LL S SSL SLL,"SL_LL ,S SS|L SLL","[á, the, m e, nam, prá, hi, ṇu, tāt, pi, tŕ̥bh...",11,Triṣṭubh,...,,hiṇu,"[6, 7]",6,7,S,X,[],[],
877,10.016.09.a,hino,kravyā́dam agním= prá hinomi dūrám,kravyā́dam agním prá hiṇomi dūráṁ,kravyā́dam agním prá hiṇomi dūráṁ,LLS LL S SLS LL,"LLS_LL ,S S|LS LL","[krav, yā́, da, m ag, ním, prá, hi, ṇo, mi, dū...",11,Triṣṭubh,...,,hiṇo,"[7, 8]",7,8,S,L,[],[],


Stems `minā mīnā`  and `minī mīnī` are attested only in the form with the short vocalism of the root vowel (i.e. as `minā` and `minī`) -- Whitney reports the variant with later form for later texts only.

On the other hand, for stems `prīṇā`, `prīṇī` and `śrīṇī`, we find them in the VNH text with short vowel variant -- something not indicated in Whitney. 

TODO also check that above stems also actually exist with long vowel variant in the samhita (VNH) text (i.e. actually do a query for lines with more than 1 stem form when grouped by root + stem type) 


```
# TODO just import and print this?
ALTERNATE_FORMS = {
    # sandhi variants
    "aśnā": "āśnā",   #-a a- > -ā-
    "śr̥ṇī": "cchr̥ṇī", #-n ś- > -ñcch-
    # sandhi variants (fifth class)
    "ūrṇu": "orṇu",   #-a ū- > -o-
    "śr̥ṇu": "cchr̥ṇu", #-n ś- > -ñcch-
    "śr̥ṇo": "cchr̥ṇo", #-n ś- > -ñcch-
    #
    # stem variants (root vowel shortened)
    "prīṇā": "priṇā",
    "prīṇī": "priṇī",
    "śrīṇī": "śriṇī",
    # stem variants (fifth class, n > ṇ)
    "hinu": "hiṇu",
    "hino": "hiṇo",
}
```

### Distribution of stem postion in the meter

In [186]:
COLUMNS_SELECTED2 = [
    "line_no", "text_samhitapatha",
    "stanza_meter",
    "present_class", "stem_type", "meter_stem_found",
    "meter_stem_positions", 
    "meter_stem_root_vowel_position", "meter_stem_root_vowel_position_scansion_expected",
    "meter_stem_vowel_position", "meter_stem_vowel_position_scansion_expected",
    "meter_stem_fault_positions",
    #"meter_stem_fault_position_labels",
    "meter_fault_positions", "meter_faults",
    "meter_notes", "meter_has_restorations",
    "pada_label", "stanza_late_addition", "stanza_strata",
    "word", "word_gloss"   
]

df_rv_lines_annotated[COLUMNS_SELECTED2] \
    .query('present_class == "ninth" and stem_type == "weak" and stanza_meter != ""') \
    .sort_values(["stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"])
    #.groupby("stanza_meter") \
    #.head(1000)

Unnamed: 0,line_no,text_samhitapatha,stanza_meter,present_class,stem_type,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position,...,meter_stem_fault_positions,meter_fault_positions,meter_faults,meter_notes,meter_has_restorations,pada_label,stanza_late_addition,stanza_strata,word,word_gloss
145,06.044.04.b,gr̥ṇīṣé śávasas pátim,Aṇuṣṭubh,ninth,weak,gr̥ṇī,"[1, 2]",1,X,2,...,[],[],,,False,M,,A,gr̥ṇīṣé,1.SG.PRS.IND.MED
123,01.010.04.b,abhí gr̥ṇīhi ā́ ruva,Aṇuṣṭubh,ninth,weak,gr̥ṇī,"[3, 4]",3,X,4,...,[],[],,,False,M,,N,gr̥ṇīhi,2.SG.PRS.IMP.ACT
189,09.067.27.d,*jā́tavedaḥ punīhí mā,Aṇuṣṭubh,ninth,weak,punī,"[5, 6]",5,S,6,...,[],[],,,True,MO,"['Oldenberg (O)', 'Wuest (W)']",N,punīhí,2.SG.PRS.IMP.ACT
190,09.067.27.c,*víśve devāḥ punītá mā,Aṇuṣṭubh,ninth,weak,punī,"[5, 6]",5,S,6,...,[],[],,,True,MO,"['Oldenberg (O)', 'Wuest (W)']",N,punītá,2.PL.PRS.IMP.ACT
251,05.020.03.a,hótāraṁ tvā vr̥ṇīmahe,Aṇuṣṭubh,ninth,weak,vr̥ṇī,"[5, 6]",5,S,6,...,[],[],,,False,M,,A,vr̥ṇīmahe,1.PL.PRS.IND.MED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,10.061.03.d,áśrīṇīta ̀ ādíśaṁ gábhastau,Triṣṭubh,ninth,weak,śrīṇī,"[1, 2, 3]",2,X,3,...,[],[],,,False,M,['Grassmann (G)'],A,áśrīṇīta,3.SG.IPRF.IND.MED
313,03.030.17.b,vr̥ścā́ mádhyam práti ágraṁ śr̥ṇīhi,Triṣṭubh,ninth,weak,śr̥ṇī,"[9, 10]",9,S,10,...,[],[],,,False,M,,N,śr̥ṇīhi,2.SG.PRS.IMP.ACT
311,10.087.10.b,tásya trī́ṇi práti śr̥ṇīhi ágrā,Triṣṭubh,ninth,weak,śr̥ṇī,"[7, 8]",7,X,8,...,[],[],caesura_position=0,,False,P,['Arnold (C2)'],P,śr̥ṇīhi,2.SG.PRS.IMP.ACT
312,10.087.10.c,tásyāgne pr̥ṣṭī́r hárasā śr̥ṇīhi,Triṣṭubh,ninth,weak,śr̥ṇī,"[9, 10]",9,S,10,...,[],[],,,False,P,['Arnold (C2)'],P,śr̥ṇīhi,2.SG.PRS.IMP.ACT


#### Breakdown by expected scansion

In [65]:
#df_rv_lines_annotated[COLUMNS_SELECTED2] \
#    .query('stem_type == "weak" and stanza_meter != ""') \
#    .sort_values(["meter_stem_vowel_position", "stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"]) \
#    .groupby("stanza_meter") \
#    .head(1000)

In [188]:
# remove dup lines, epic anustubhs and (TODO!) others 
# TODO! should do pada_label check without the P; also something else than first()?
df_rv_lines_annotated_filtered = df_rv_lines_annotated \
    .sort_values("line_no").groupby(["text_samhitapatha"]).first() \
    .query('\
        stanza_meter != "" \
        and pada_label not in ["PE3b", "PE3a", "PE2", "PE1"] \
    ')
#and stanza_meter == "Gāyatrī" \

# without the extra filtering
#df_rv_lines_annotated_filtered = df_rv_lines_annotated \
#    .query('\
#        stanza_meter != "" \
#        #and stem_type == "weak" \
#    ')

#.groupby(["meter_stem_vowel_position_scansion_expected", "stem_type", "meter_stem_vowel_position"]) \
COLUMNS_SELECTED3 = COLUMNS_SELECTED2.copy()
COLUMNS_SELECTED3.remove("text_samhitapatha")
df_rv_lines_annotated_filtered[COLUMNS_SELECTED3] \
    .groupby(["present_class", "meter_stem_vowel_position_scansion_expected", "stem_type"]) \
    .size() \
    #.head(1000)

#df_rv_lines_annotated_filtered[COLUMNS_SELECTED2] \
#    .query('stem_type == "weak"') \
#    .value_counts("meter_stem_vowel_position_scansion_expected") \

#df_rv_lines_annotated_filtered["pada_label"].value_counts()
#df_rv_lines_annotated_filtered["text_samhitapatha"].value_counts()
#df_rv_lines_annotated_filtered["word"].value_counts()

present_class  meter_stem_vowel_position_scansion_expected  stem_type
fifth          L                                            strong        87
                                                            weak          13
               S                                            strong         3
                                                            weak         131
               X                                            strong       100
                                                            weak          56
ninth          L                                            strong        42
                                                            weak          75
               S                                            strong         1
                                                            weak           3
               X                                            strong        65
                                                            weak          51
dtype:

Weak stem being treated as short more is intresting, but the counts are low here. 

TODO see if a similar pattern is there for -no/nu- class, to be used further as control: we shouldn't find -no- in expected short, or -nu- in expected long -- for this, can use other classes too with long stem vowels i.e. reduplicated strong stems (or paradigms with I?)? also pattern of nu in archaic layers should match that of nI? (also need to look by overall shape of the word?)

In [67]:
# anustubh/gayatri 6th syllable: weak has more instances than strong, but no significance?
#df_rv_lines_annotated_filtered[COLUMNS_SELECTED3] \
#    .groupby(["stanza_meter", "stem_type", "meter_stem_vowel_position"]) \
#    .size()

TODO also add visualization by actual position numbers/stanza meter; any patterns there? association with general rhythm pattern of the meter? (Macdonell 440, Van nooten tables)

#### Breakdown by expected scansion (over strata)

In [189]:
df_rv_lines_annotated_filtered[COLUMNS_SELECTED3] \
    .query("present_class == 'ninth'") \
    .groupby(["meter_stem_vowel_position_scansion_expected", "stem_type", "stanza_strata"]) \
    .size()

meter_stem_vowel_position_scansion_expected  stem_type  stanza_strata
L                                            strong     A                 4
                                                        C                11
                                                        N                 8
                                                        P                 7
                                                        S                 7
                                                        a                 2
                                                        n                 1
                                                        p                 1
                                                        s                 1
                                             weak       A                 9
                                                        C                13
                                                        N                29
                  

TODO better visualize distribution by stanza_strata

Seems to be that usage of weak stem in long syllables jumps significantly over time, compared to the same for strong. TODO ensure that this is not because of a bias (eg: thematic/person shifts)

In [190]:
df_rv_lines_annotated_filtered[COLUMNS_SELECTED3] \
    .query("present_class == 'fifth'") \
    .groupby(["meter_stem_vowel_position_scansion_expected", "stem_type", "stanza_strata"]) \
    .size()

meter_stem_vowel_position_scansion_expected  stem_type  stanza_strata
L                                            strong     A                 6
                                                        C                22
                                                        N                23
                                                        P                23
                                                        S                 6
                                                        a                 1
                                                        n                 4
                                                        p                 1
                                                        s                 1
                                             weak       A                 5
                                                        N                 2
                                                        P                 2
                  

In [191]:
df_rv_lines_annotated_filtered[COLUMNS_SELECTED3] \
    .query('\
        present_class== "ninth" \
        and stem_type == "weak" \
        and meter_stem_vowel_position_scansion_expected == "L" \
        and stanza_strata == "N" \
    ') \
    .sort_values(["meter_stem_found"]) \
    #.groupby("meter_stem_found").size()
    #.groupby("line_no").size()


Unnamed: 0_level_0,line_no,stanza_meter,present_class,stem_type,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position,meter_stem_vowel_position_scansion_expected,meter_stem_fault_positions,meter_fault_positions,meter_faults,meter_notes,meter_has_restorations,pada_label,stanza_late_addition,stanza_strata,word,word_gloss
text_samhitapatha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
abhí yajñáṁ gr̥ṇīhi no,01.015.03.a,Gāyatrī,ninth,weak,gr̥ṇī,"[5, 6]",5,S,6,L,[],[],,,False,MO,,N,gr̥ṇīhi,2.SG.PRS.IMP.ACT
bhū́rer dātā́raṁ sátpatiṁ gr̥ṇīṣe,02.033.12.c,Triṣṭubh,ninth,weak,gr̥ṇī,"[9, 10]",9,S,10,L,[],[],,,False,M,,N,gr̥ṇīṣe,1.SG.PRS.IND.MED
sahasrasā́m ā́gniveśiṁ gr̥ṇīṣe,05.034.09.a,Triṣṭubh,ninth,weak,gr̥ṇī,"[9, 10]",9,S,10,L,[],[],,caesura_position_inside_syllable=True,False,M,['Grassmann (G)'],N,gr̥ṇīṣe,1.SG.PRS.IND.MED
chruṣṭī́ deṣṇám abhí gr̥ṇīhi rā́dhaḥ,02.009.04.b,Triṣṭubh,ninth,weak,gr̥ṇī,"[7, 8]",7,X,8,L,[],[],,caesura_position_inside_syllable=True,False,M,,N,gr̥ṇīhi,2.SG.PRS.IMP.ACT
ásunvatā sutapā́ḥ sáṁ gr̥ṇīte,04.025.07.b,Triṣṭubh,ninth,weak,gr̥ṇī,"[9, 10]",9,S,10,L,[],[],,,False,M,,N,gr̥ṇīte,3.SG.PRS.IND.MED
gīrbhír u ̀ sváyaśasaṁ gr̥ṇīmasi,10.092.14.b,Jagatī,ninth,weak,gr̥ṇī,"[9, 10]",9,S,10,L,[],[],,,False,M,,N,gr̥ṇīmasi,1.PL.PRS.IND.ACT
*jā́tavedaḥ punīhí mā,09.067.27.d,Aṇuṣṭubh,ninth,weak,punī,"[5, 6]",5,S,6,L,[],[],,,True,MO,"['Oldenberg (O)', 'Wuest (W)']",N,punīhí,2.SG.PRS.IMP.ACT
ágne téna punīhi naḥ,09.067.24.b,Gāyatrī,ninth,weak,punī,"[5, 6]",5,S,6,L,[],[],,,False,MO,"['Oldenberg (O)', 'Wuest (W)']",N,punīhi,2.SG.PRS.IMP.ACT
ágne dákṣaiḥ punīhi naḥ,09.067.26.c,Gāyatrī,ninth,weak,punī,"[5, 6]",5,S,6,L,[],[],,,False,MO,"['Oldenberg (O)', 'Wuest (W)']",N,punīhi,2.SG.PRS.IMP.ACT
yád āsiñcā́ óṣadhībhiḥ punītāt,10.030.05.d,Triṣṭubh,ninth,weak,punī,"[9, 10]",9,S,10,L,[],[],,,False,M,,N,punītāt,2.SG.PRS.IMP.ACT


### Lines with meter faults in the stem positions

In [192]:
# TODO add extra check for meter_stem_vowel_position in meter_stem_fault_positions
#df_rv_lines_annotated[COLUMNS_SELECTED2] \
#    .query('\
#        stanza_meter != "" and meter_faults != "" \
#        and meter_stem_fault_positions.str.len() != 0 \
#    ', engine='python'
#    ).sort_values(["stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"])

# no longer used
#df_rv_lines_annotated[COLUMNS_SELECTED2].query(
#    'meter_stem_fault_position_labels != ""'
#    # more specific
#    #'meter_stem_fault_position_labels.str.contains("stem_vowel")', engine='python'
#).sort_values(["stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"])

# we expected long (L) scansion for our stem vowel, but if it's short (S), meter is faulty
df_rv_lines_annotated[COLUMNS_SELECTED2] \
    .query('meter_stem_vowel_position_scansion_expected == "S" and present_class == "ninth"') \
    .sort_values(["stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"])

Unnamed: 0,line_no,text_samhitapatha,stanza_meter,present_class,stem_type,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position,...,meter_stem_fault_positions,meter_fault_positions,meter_faults,meter_notes,meter_has_restorations,pada_label,stanza_late_addition,stanza_strata,word,word_gloss
345,09.067.31.c,*sárvaṁ sá pūtám aśnāti,Aṇuṣṭubh,ninth,strong,aśnā,"[6, 7]",6,L,7,...,[7],[7],scansion_cadence=SLLX,,True,PE2,"['Grassmann (G)', 'Oldenberg (O)', 'Arnold (C2...",P,aśnāti,3.SG.PRS.IND.ACT
21,10.145.04.a,nahy a\syā nā́ma gr̥bhṇā́mi,Aṇuṣṭubh,ninth,strong,gr̥bhṇā,"[6, 7]",6,L,7,...,[7],[7],scansion_cadence=SLLX,,False,PE1,,P,gr̥bhṇā́mi,1.SG.PRS.IND.ACT
173,04.057.07.a,índraḥ sī́tāṁ ní gr̥hṇātu,Aṇuṣṭubh,ninth,strong,gr̥hṇā,"[6, 7]",6,L,7,...,[7],[7],scansion_cadence=SLLX,,False,PE2,"['Grassmann (G)', 'Oldenberg (O)', 'Arnold (C1...",P,gr̥hṇātu,3.SG.PRS.IMP.ACT
334,10.153.03.c,úd dyā́m astabhnā ójasā,Gāyatrī,ninth,strong,stabhnā,"[3, 4, 5]",4,X,5,...,[5],[5],scansion_cadence=LLSX,,False,P,,P,astabhnāḥ,2.SG.IPRF.IND.ACT
110,08.002.11.b,índremáṁ sómaṁ śrīṇīhi,Gāyatrī,ninth,weak,śrīṇī,"[6, 7]",6,L,7,...,[7],"[5, 7]",scansion_cadence=LLLX,,False,MT,,S,śrīṇīhi,2.SG.PRS.IMP.ACT
156,09.084.01.d,urukṣitaú gr̥ṇīhi daíviyaṁ jánam,Jagatī,ninth,weak,gr̥ṇī,"[5, 6]",5,X,6,...,[6],[6],scansion_post_caesura=SL,,False,M,,N,gr̥ṇīhi,2.SG.PRS.IMP.ACT
261,07.097.02.a,ā́ daíviyā vr̥ṇīmahe ávāṁsi,Triṣṭubh,ninth,weak,vr̥ṇī,"[5, 6]",5,X,6,...,[6],[6],scansion_post_caesura=SL,,False,M,,S,vr̥ṇīmahe,1.PL.PRS.IND.MED


Of the 4 strong cases, 3 are in Epic Aṇuṣṭubh and for 10.153.03.c, pada b is also irregular.

08.002.11.b is trochaic gayatri, pointing to short root vowel earlier.

Last two may be evidence of earlier short ni!

TODO investigate these in the context of their stanza/hymn

TODO also investigate lines that don't have meter set

TODO check Van Holland and Nooten notes for these lines. Also for cadence faults, see how rare the cadence is in their statistical analyses (table, intro VIII). can extend this to rhythm analysis in non-cadence positions too

#### For the control data (fifth class)

In [193]:
df_rv_lines_annotated[COLUMNS_SELECTED2] \
    .query('\
        meter_stem_vowel_position_scansion_expected == "S" \
        and present_class == "fifth" \
        and stem_type == "strong" \
    ') \
    .sort_values(["stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"])

Unnamed: 0,line_no,text_samhitapatha,stanza_meter,present_class,stem_type,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position,...,meter_stem_fault_positions,meter_fault_positions,meter_faults,meter_notes,meter_has_restorations,pada_label,stanza_late_addition,stanza_strata,word,word_gloss
807,08.002.11.c,revántaṁ hí tvā śr̥ṇómi,Gāyatrī,fifth,strong,śr̥ṇo,"[6, 7]",6,L,7,...,"[6, 7]","[5, 6, 7]",scansion_cadence=LSLX,,False,MT,,S,śr̥ṇómi,1.SG.PRS.IND.ACT
361,01.035.09.d,abhí kr̥ṣṇéna rájasā dyā́m r̥ṇoti,Jagatī,fifth,strong,r̥ṇo,"[10, 11]",10,L,11,...,"[10, 11]","[9, 10, 11]",scansion_cadence=LLSLX,,False,D,,C,r̥ṇoti,3.SG.PRS.IND.ACT
832,01.122.09.b,apó ná vāṁ sunóti akṣṇayādhrúk,Triṣṭubh,fifth,strong,suno,"[5, 6]",5,X,6,...,[6],[6],scansion_post_caesura=SL,,False,M,['Grassmann (G)'],A,sunóti,3.SG.PRS.IND.ACT


In [194]:
df_rv_lines_annotated[COLUMNS_SELECTED2] \
    .query('\
        meter_stem_vowel_position_scansion_expected == "L" \
        and present_class == "fifth" \
        and stem_type == "weak" \
    ') \
    .sort_values(["stem_type", "stanza_meter", "meter_stem_found", "stanza_strata"])

Unnamed: 0,line_no,text_samhitapatha,stanza_meter,present_class,stem_type,meter_stem_found,meter_stem_positions,meter_stem_root_vowel_position,meter_stem_root_vowel_position_scansion_expected,meter_stem_vowel_position,...,meter_stem_fault_positions,meter_fault_positions,meter_faults,meter_notes,meter_has_restorations,pada_label,stanza_late_addition,stanza_strata,word,word_gloss
399,01.122.04.c,prá vo nápātam apáaṁ kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,caesura_position_inside_syllable=True,False,M,,A,kr̥ṇudhvam,2.PL.PRS.IMP.MED
403,01.186.10.a,prá ūaśvínāv ávase kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,caesura_position_inside_syllable=True,False,MS,,A,kr̥ṇudhvam,2.PL.PRS.IMP.MED
433,05.041.06.a,prá vo vāyúṁ rathayújaṁ kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,,False,M,,A,kr̥ṇudhvam,2.PL.PRS.IMP.MED
445,06.049.06.d,jágata sthātar jágad ā́ kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,,False,M,,A,kr̥ṇudhvam,2.PL.PRS.IMP.MED
505,10.074.05.a,śácīva índram ávase kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,caesura_position_inside_syllable=True,False,M,,A,kr̥ṇudhvam,2.PL.PRS.IMP.MED
432,05.028.03.c,sáṁ jāspatyáṁ suyámam ā́ kr̥ṇuṣva,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,,False,M,"['Oldenberg (o)', 'Wuest (W)']",N,kr̥ṇuṣva,2.SG.PRS.IMP.MED
482,09.091.05.b,suuktā́ya patháḥ kr̥ṇuhi prā́caḥ,Triṣṭubh,fifth,weak,kr̥ṇu,"[7, 8]",7,X,8,...,[8],"[6, 8, 9]",scansion_cadence=SLLX scansion_post_caesura=SL,,False,M,,N,kr̥ṇuhi,2.SG.PRS.IMP.ACT
369,10.101.02.b,nā́vam aritrapáraṇīṁ kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],caesura_position=0,,False,P,['Arnold (C1)'],P,kr̥ṇudhvam,2.PL.PRS.IMP.MED
507,10.085.20.d,siyonám pátye vahatúṁ kr̥ṇuṣva,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,,False,P,"['Grassmann (G)', 'Oldenberg (O)', 'Arnold (C2...",P,kr̥ṇuṣva,2.SG.PRS.IMP.MED
421,04.003.01.d,*dhíraṇyarūpam ávase kr̥ṇudhvam,Triṣṭubh,fifth,weak,kr̥ṇu,"[9, 10]",9,S,10,...,[],[],,caesura_position_inside_syllable=True,True,M,,S,kr̥ṇudhvam,2.PL.PRS.IMP.MED


Here, the case of `kr̥ṇuhi` is the only instance where the stem vowel in `-nu-` is in an expected long syllable. 

In the other cases (`kr̥ṇudhvam` and  `kr̥ṇuṣva`), `-nu-` is in a closed syllable and thus the expected long position is correctly fullfilled.

## Saving the Final Line Results

In [74]:
#df_rv_lines_annotated.to_csv("data/rv_lines_with_meter.csv", index=None)