In [14]:
import lib.get_assets as assets

In [2]:
import lib.model0_9 as m0_9

### Rule 1

In [3]:

tests = [{'farsi': 'ییلاق‌نشین', 'trans': 'yeylAqneSin'},
         {'farsi': 'ییدیش', 'trans': 'yidiS'},
         {'farsi': 'یوونتوس', 'trans': 'yuventus'},
         {'farsi': 'یونید', 'trans': 'یونید'} # case verb
        ]

for t in tests:   
    assert m0_9.general_search(t['farsi'], pos_neg='Verb') == t['trans']


### Rule 2 (Nouns)

In [4]:

tests = [{'farsi': 'پیامبرت', 'trans': 'payAmbarat'},
         {'farsi': 'مزايایی', 'trans': 'mazAyAyi'},
         {'farsi': 'بی شرف', 'trans': 'bi Saraf'},
         {'farsi': 'بی عقل', 'trans':'bi \'aql'}]

for t in tests:
    assert m0_9.run_transcription_0(t['farsi']) == t['trans']


### Rule 3 (Verbs basis)

In [5]:

tests = [{'farsi': 'بخواهند بروند', 'trans': 'bexAhand beravand'},
         # {'farsi': 'می‌تواند بخوابد', 'trans': 'mitavAnad bexAbad'}, # \u200c
         # {'farsi': 'نشد بپریم', 'trans': 'naSod beparim'}, # می
         # {'farsi': 'می روید', 'trans': 'miravid'} # 
        # {'farsi': 'می ترسد', 'trans': 'mitarsad'}
        ]

for t in tests:
    assert m0_9.run_transcription_0(t['farsi']) == t['trans']


### Rule 4 (Frequency based prioritization for collisions)

In [6]:

tests = [{'farsi': 'ی', 'trans': 'ye'}]

for t in tests:
    assert m0_9.general_search(t['farsi']) == t['trans']

    
tests = [{'farsi': 'ی', 'trans': 'i'}]
for t in tests:
    assert m0_9.affix_search(t['farsi']) == t['trans']


### Rule 5 (Verb _ as خواهند_كر)

In [8]:

tests = [{'farsi': 'خواهند_كرد', 'trans': 'xAhand kard'}]

for t in tests:
    assert m0_9.process_verb(m0_9.normalise(t['farsi'])) == t['trans']

tests = [{'farsi': 'خواهند كرد', 'trans': 'xAhand kard'},
         {'farsi': 'بخواهند بروند', 'trans': 'bexAhand beravand'}]

for t in tests:
    assert m0_9.run_transcription_0(t['farsi']) == t['trans']
    

### Rule 6 (normalisation ي, ك)

In [9]:

tests = [{'farsi': 'گزارشي', 'trans': 'گزارشی'},
         {'farsi': 'ك', 'trans': 'ک'}]

for t in tests:
    assert m0_9.normalise(t['farsi']) == t['trans']


### Rule 7 (affix ی)

### Rule 8 (letter ۀ)

### Rule 9 (collision affix ات)

In [10]:

def rule_9(wrd, pos=None):
    if not 'ات' in wrd:
        return wrd
    else:
        if 'ات‌' in wrd:
            d_affixes = m0_9.get_affixes(wrd, 'ات‌') #, stem)
            stem = 'At'
        elif '\u200cات' in wrd:
            d_affixes = m0_9.get_affixes(wrd, '\u200cات')
            stem = '\'at'
        else:
            d_affixes = m0_9.get_affixes(wrd, 'ات')
            stem = 'At'

    if len(d_affixes['prefix']) > len(d_affixes['suffix']):
        return m0_9.general_search(d_affixes['prefix'], pos_pos=pos) + stem + m0_9.affix_search(d_affixes['suffix'] )
    else:
        return m0_9.affix_search(d_affixes['prefix']) + stem + m0_9.general_search(d_affixes['suffix'], pos_pos=pos)


tests = [{'farsi': 'مزخرفات', 'trans': 'mozaxrafAt'},
         {'farsi': 'خاطرات‌مان', 'trans': 'xAterAtemAn'},
         {'farsi': 'عمه‌ات', 'trans': "'amme'at"}]


for t in tests:
    assert rule_9(m0_9.normalise(t['farsi'])) == t['trans']


### Rule 10 (collision affix ان)

### Rule 11 (collision affix ش)

In [11]:

def rule_11(wrd, pos=None):
    if 'ش' != wrd[-1]:
        return wrd
    else:
        if pos == 'Noun':
            return m0_9.process_noun(wrd[:-1])+'aS'
        elif pos == 'Verb':
            return m0_9.process_verb(wrd[:-1])+'eS'
        else:
            return m0_9.general_search(wrd[:-1], pos_pos=pos)+'eS'

tests = [{'farsi': 'وسایلش', 'trans':'vasAyelaS', 'pos': 'Noun'},
         {'farsi': 'بردندش', 'trans': 'bordandeS', 'pos': 'Verb'}]

for t in tests:
    assert rule_11(t['farsi'], t['pos']) == t['trans']


### Rule 12 (affix م)

### Rule 13 (affix مان)

### Rule 14 (affix می)

In [12]:

def rule_14(wrd, pos=None):
    if not 'می' in wrd:
        return wrd
    else:
        if 'می‌' == wrd[:3]:
            return 'mi'+m0_9.process_wrd(wrd[3:], pos)
        elif 'می' == wrd[-2:]:
            return m0_9.general_search(wrd[:-2]) + 'omi'
        
tests = [{'farsi': 'می‌تواند', 'trans': 'mitavAnad', 'pos': 'Verb'},
         {'farsi': 'چندمی', 'trans': 'Candomi'}]

for t in tests:
    assert rule_14(t['farsi'], t.get('pos', None)) == t['trans']


### Rule 15 (affix آ)

### Rule 16 (affix ون)

### Rule 17 (affix ید)

In [15]:

def rule_17(wrd, pos=None):
    if not 'ید' in wrd:
        return wrd
    elif pos == 'Verb':
        l_lemma = [w for w in assets.lemmatizer.lemmatize(wrd).split('#') if w == wrd[:len(w)]]
        if len(l_lemma) > 0:
            # print(1)
            return m0_9.process_verb(wrd[:-len('ید')]) + 'id'
        else:
            return m0_9.process_verb(wrd[:-len('ید')])+'yad'
    else:
        return wrd # ???
    
tests = [{'farsi': 'رفتید', 'trans': 'raftid', 'pos': 'Verb'},
         {'farsi': 'بگوید', 'trans': 'beguyad', 'pos': 'Verb'},
         #{'farsi': 'می‌آید', 'trans': 'beguyad', 'pos': 'Verb'}
        ]

for t in tests:
    assert rule_17(t['farsi'], t.get('pos', None)) == t['trans']


### Rule 18 (verb with یم)

In [16]:

tests = [{'farsi': 'بپریم', 'trans': 'beparim', 'pos': 'Verb'},
         {'farsi': 'رفتیم', 'trans': 'raftim', 'pos': 'Verb'},
         {'farsi': 'خدایم', 'trans': 'xodAyam', 'pos': 'Noun'}]

for t in tests:
    assert m0_9.process_wrd(t['farsi'], t.get('pos', None)) == t['trans']


### Rule 19 (Semispace u200c, improve imlementation in 0.9)

### Rule 20 (Recursive search, improve implemented in 0.9)

### Rule 21 (affix ن)

### Rule 22 (affixes بی and نی)

### Rule 23 (root of the verb is رو)

### Rule 24 (Recursion, improve v0.9)