# Roots and Stems With Stanza Attestations

## Enriching list of Verbal Roots with Rigveda Attestations

In [29]:
import pandas

# see 2_roots.ipynb for details on this file
df_roots = pandas.read_csv("data/roots.csv", keep_default_na=False)
#df_roots.head()

roots = df_roots.to_dict("records")
roots[0] # sample

{'root_guess': 'i 2',
 'variant_no': '',
 'strong_stem': 'inā',
 'weak_stem': 'inī',
 'weak_only': True,
 'attestation_texts': 'V.',
 'language_period': 'Earlier',
 'present_class': 'ninth'}

In [16]:
# in the same folder as this notebook
import src.lib.roots_attestations as roots_attestations

# useful during testing to pick up changes in the file
import importlib
importlib.reload(roots_attestations)

roots_with_attestations, roots_with_attested_words = roots_attestations.get_attestations(roots)

[ninth] i 2: 0 strong, 0 weak attestations
note: using 'iṣ 1' for root guess iṣ (as done in vedaweb)
[ninth] iṣ 1: 1 strong, 0 weak attestations
[ninth] ubh: 2 strong, 0 weak attestations
[ninth] uṣ: 0 strong, 0 weak attestations
note: using 'kṣī' for root guess kṣi (as done in vedaweb)
[ninth] kṣī: 3 strong, 0 weak attestations
[ninth] gr̥: 0 strong, 0 weak attestations
note: using 'gr̥bhⁱ' for root guess gr̥bh (as done in vedaweb)
[ninth] gr̥bhⁱ: 10 strong, 6 weak attestations
[ninth] jū: 4 strong, 1 weak attestations
note: using 'jyā' for root guess jī (as done in vedaweb)
[ninth] jyā: 3 strong, 0 weak attestations
[ninth] dr̥: 0 strong, 0 weak attestations
[ninth] drū: 0 strong, 0 weak attestations
note: using 'pr̥̄ 1' for root guess pr̥ (as done in vedaweb)
[ninth] pr̥̄ 1: 12 strong, 6 weak attestations
[ninth] pruṣ: 0 strong, 0 weak attestations
[ninth] bhrī: 0 strong, 0 weak attestations
note: using 'mī 1' for root guess mi mī (as done in vedaweb)
[ninth] mī 1: 23 strong, 2 weak

Now let's look at some of the sample results.

In [27]:
roots_with_attestations[2] # sample

{'root_guess': 'ubh',
 'variant_no': '',
 'strong_stem': 'ubhnā',
 'weak_stem': 'ubhnī',
 'weak_only': False,
 'attestation_texts': 'V.',
 'language_period': 'Earlier',
 'present_class': 'ninth',
 'root': 'ubh',
 'strong_attestations': '01.063.04 04.019.04',
 'strong_attestations_total': 2,
 'weak_attestations': '',
 'weak_attestations_total': 0}

In [26]:
roots_with_attested_words["ninth"]["ubh"] # sample

{'strong': {'01.063.04': [{'word': 'ubhnā́ḥ',
    'gloss': {'voice': 'ACT',
     'number': 'SG',
     'mood': 'INJ',
     'person': '3',
     'tense': 'PRS'}}],
  '04.019.04': [{'word': 'aubhnāt',
    'gloss': {'voice': 'ACT',
     'number': 'SG',
     'mood': 'IND',
     'person': '3',
     'position': 'intermediate',
     'tense': 'IPRF'}}]},
 'weak': {}}

## Saving the Results

In [30]:
import json
with open(f"data/roots_with_attested_words.json", 'w') as f:
    json.dump(roots_with_attested_words, f, indent=2, ensure_ascii=False)

import pandas
df_roots_with_attestations = pandas.DataFrame.from_dict(roots_with_attestations)
df_roots_with_attestations.to_csv("data/roots_with_attestations.csv", index=None)
df_roots_with_attestations.head()

Unnamed: 0,root_guess,variant_no,strong_stem,weak_stem,weak_only,attestation_texts,language_period,present_class,root,strong_attestations,strong_attestations_total,weak_attestations,weak_attestations_total
0,i 2,,inā,inī,True,V.,Earlier,ninth,i 2,,0,,0
1,iṣ,,iṣṇā,iṣṇī,False,,Earlier,ninth,iṣ 1,01.063.02,1,,0
2,ubh,,ubhnā,ubhnī,False,V.,Earlier,ninth,ubh,01.063.04 04.019.04,2,,0
3,uṣ,,uṣṇā,uṣṇī,False,V.,Earlier,ninth,uṣ,,0,,0
4,kṣi,,kṣiṇā,kṣiṇī,False,V.B.,Earlier,ninth,kṣī,04.018.12 10.027.04 10.027.13,3,,0


## Validation

### Checking for attested roots

In [32]:
# roots attested in RV
df_roots_with_attestations.query(
    'strong_attestations_total > 0 or weak_attestations_total > 0'
).sort_values(["present_class", "language_period", "root"])

Unnamed: 0,root_guess,variant_no,strong_stem,weak_stem,weak_only,attestation_texts,language_period,present_class,root,strong_attestations,strong_attestations_total,weak_attestations,weak_attestations_total
62,dabh,,dabhno,dabhnu,False,V.B.,Earlier,fifth,dabh,,0,01.055.07,1
63,dāś,,dāśno,dāśnu,False,V.,Earlier,fifth,dāś,08.004.06,1,,0
54,i 2,,ino,inu,False,V.,Earlier,fifth,i 2,01.066.10 04.010.07 04.016.07 06.004.03 06.005...,7,06.010.07 09.029.04,2
59,ji,,jino,jinu,False,V.B.,Earlier,fifth,ji 2 jinv,05.084.01,1,,0
57,kr̥,,kr̥ṇo,kr̥ṇu,False,,Earlier,fifth,kr̥,07.018.05 01.013.12 01.018.08 01.031.07 01.048...,113,10.101.02 01.182.03 02.026.02 04.017.10 05.083...,142
66,pruṣ,,pruṣṇo,pruṣṇu,False,V.,Earlier,fifth,pruṣⁱ,,0,01.168.08 06.071.01 10.023.04,3
56,r̥ 1,,r̥ṇo,r̥ṇu,False,V.,Earlier,fifth,r̥ 1,01.030.14 01.030.15 01.035.09 01.174.02 01.174...,9,05.045.06,1
69,sagh,,saghno,saghnu,False,V.,Earlier,fifth,sagh,01.031.03,1,,0
76,spr̥,,spr̥ṇo,spr̥ṇu,False,,Earlier,fifth,spr̥,,0,10.087.07,1
55,u 1,,uno,unu,False,V.,Earlier,fifth,u 1,05.031.01,1,,0


TODO explain cases where root_guess and root differ

eg: iS 'send' being marked with 1 automatically from vedaweb (same as what lubotsky gives)
whitney actually has this as 2 in main root list but since stem does not have variants with it, it's not marked later

### Checking for missing roots

In [33]:
# to print without index on the left
#from IPython.display import HTML
#HTML(df_roots_with_attestations.to_html(index=False))

# roots not attested in RV
df_roots_with_attestations.query(
    'strong_attestations_total == 0 and weak_attestations_total == 0'
).sort_values(["present_class", "language_period", "root"])

Unnamed: 0,root_guess,variant_no,strong_stem,weak_stem,weak_only,attestation_texts,language_period,present_class,root,strong_attestations,strong_attestations_total,weak_attestations,weak_attestations_total
53,akṣ,,akṣṇo,akṣṇu,False,V.B.,Earlier,fifth,akṣ,,0,,0
61,dagh,,daghno,daghnu,False,B.,Earlier,fifth,dagh,,0,,0
58,kṣubh,,kṣubhno,kṣubhnu,False,B.,Earlier,fifth,kṣubh,,0,,0
68,lu,,luno,lunu,False,B.S.,Earlier,fifth,lu,,0,,0
64,pi,,pino,pinu,False,V.B.,Earlier,fifth,pi,,0,,0
65,pr̥ 1,1.0,pr̥ṇo,pr̥ṇu,False,S.,Earlier,fifth,pr̥ 1,,0,,0
67,ri,,riṇo,riṇu,False,B.,Earlier,fifth,ri,,0,,0
71,sadh,,sadhno,sadhnu,False,B.,Earlier,fifth,sadh,,0,,0
70,si,,sino,sinu,False,V.B.,Earlier,fifth,si,,0,,0
72,skabh,,skabhno,skabhnu,False,B.,Earlier,fifth,skabh,,0,,0


TODO Test these with different length of final root vowel? just to see if we catch anything

Done and it didn't. 