# Roman to Urdu Transliteration

In [1]:
Rules = {
      "A": "ا",
      "AA": "آ",
      "B": "ب",
      "P":"پ",
      "T":"ت",
      "J":"ج",
      "S":"س",
      "CH":"چ",
      "H":"ه",
      "KH":"خ",
      "D":"د",
      "Z":"ذ",
      "R":"ر",
      "SH":"ش",
      "GH":"غ",
      "F":"ف",
      "K":"ک",
      "G":"گ",
      "L":"ل",
      "M":"م",
      "N":"ن",
      "O":"و",
      "Y":"ی",
      "E":"ے",
}

In [2]:
def transString(string, reverse=0):
    '''Given a Unicode string, transliterate into Buckwalter. To go from
    Buckwalter back to Unicode, set reverse=1'''

    for k, v in Rules.items():
          if not reverse:
                string = string.replace(k, v)
          else:
                string = string.replace(v, k)

    return string


In [3]:
# Rule 1 . . .
'''
Except ‘a’, ‘e’, ‘i’, ‘o’, ‘u’, ‘y’ and ‘h’, 
change the case of all the characters of rom_word into capital. 
'''
def rule_1(sentence):
    expect = {'a','e','i','o','u','y','h'}
    words = sentence.split(" ")
    output = " "
    for i in range(len(words)):
        word = words[i]
        modified = ""
        for k in range(len(word)):
            if word[k] in expect:
                modified += word[k]
            else:
                modified += (word[k]).upper()
        words[i] = modified
        output.join(words[i])

    print("Rule 1 >: ",words)
    return output.join(words)

In [4]:
# Rule 2 . . . 
'''
If the two consequent capital letters are the same, delete one of those double letters
'''
def rule_2(sentence):
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        word = words[i]
        modified = ""
        for k in range(1,len(word)):
            if word[k-1] == word[k] and word[k].isupper():
                continue
            else:
                modified += word[k-1]
        modified += word[k]
        words[i] = modified
    print("Rule 2 >: ",words)
    return output.join(words)

In [5]:
# Rule 3 . . . 
'''
If the word begins with a vowel, append ‘A’ at the beginning of the word.
'''
def rule_3(sentence):
    vowal = {'a','e','i','o','u'}
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        word = words[i]
        modified = ""
        if word[0] in vowal:
            modified += 'A'
        modified += word
        words[i] = modified
    print("Rule 3 >: ",words)
    return output.join(words)

In [6]:
# Rule 4
'''
For the sequences ‘eh’ and ‘oh’, do the following replacements. Consider the longest match at left hand side.
    ehe = eHe, H
    eh = eH, H
    oh = oH, H
    h = H
'''
def rule_4(sentence):
    import re
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        h = re.compile(r'h')
        words[i] = h.sub(r'H', words[i])

        oh = re.compile(r'oH')
        words[i] = oh.sub(r'H', words[i])

        eh = re.compile(r'eH')
        words[i] = eh.sub(r'H', words[i])

        ehe = re.compile(r'eHe')
        words[i] = ehe.sub(r'H', words[i])

    print("Rule 4 >: ",words)
    return output.join(words)

In [7]:
# Rule 5
'''
If ‘y’ is the last character of the word and is preceded by ‘e’ or ‘a’, ‘then
    ey = Y
    ay = E
'''
def rule_5(sentence):
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        word = words[i]
        modified = ""
        if word[-1] == 'y':
            if word[-2] == 'e':
                for k in range(len(word)-2):
                    modified += word[k]
                modified += 'Y'
                words[i] = modified
        if word[-1] == 'y':
            if word[-2] == 'a':
                for k in range(len(word)-2):
                    modified += word[k]
                modified += 'E'
                words[i] = modified
        
    print("Rule 5 >: ",words)
    return output.join(words)


In [8]:
# Rule 6 & 7
'''
If ‘y’ is preceded by ‘e’ or ‘a’ and followed by a vowel then
    ey = Y, eY
    ay = Y, aY

    I'm only doing y.sub('Y') aslo handle Rule 7
'''
def rule_6_and_7(sentence):
    import re
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        y = re.compile(r'y')
        words[i] = y.sub(r'Y', words[i])
    print("Rule 6&7 >: ",words)
    return output.join(words)

In [9]:
# Rule 8
'''
If the vowel sequence ‘ai’ or ‘ei’ is present at the end of the word, then apply following replacement.
    ai = E, aYi, aAi
    ei = E, eYi, eAi
'''
def rule_8(sentence):
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        word = words[i]
        modified = ""
        if word[-1] == 'i':
            if word[-2] == 'e' or word[-2] == 'a':
                for k in range(len(word)-2):
                    modified += word[k]
                modified += 'E'
                words[i] = modified
                 
    print("Rule 8 >: ",words)
    return output.join(words)


In [10]:
# Rule 9
'''
This rule is a generalized form of rule 8.
'''

'\nThis rule is a generalized form of rule 8.\n'

In [11]:
# Rule 10
'''
For two vowel sequence, do the following replacements.
    aa = A
    ai = Y
    ei = Y
    ee = Y
    ie = Y
    oo = O
    au = O
    ou = O
'''

def rule_10(sentence):
    import re
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        aa = re.compile(r'aa')
        words[i] = aa.sub(r'A', words[i])

        ai = re.compile(r'ai')
        words[i] = ai.sub(r'Y', words[i])

        ei = re.compile(r'ei')
        words[i] = ei.sub(r'Y', words[i])

        ee = re.compile(r'ee')
        words[i] = ee.sub(r'Y', words[i])

        ie = re.compile(r'ie')
        words[i] = ie.sub(r'Y', words[i])

        oo = re.compile(r'oo')
        words[i] = oo.sub(r'O', words[i])

        au = re.compile(r'au')
        words[i] = au.sub(r'O', words[i])

        ou = re.compile(r'ou')
        words[i] = ou.sub(r'O', words[i])

    print("Rule 10 >: ",words)
    return output.join(words)

In [12]:
# Rule 11
'''
11) Search the following vowels at word’s final position and . make the substitutions accordingly.
    e (at final) = E
    a (at final) = A, H
    i (at final) = Y
    u (at final) = O
'''
def rule_11(sentence):
    output = " "
    words = sentence.split(" ")
    for i in range(len(words)):
        word = words[i]
        modified = ""
        if word[-1] == 'e':
            for k in range(len(word)-1):
                modified += word[k]
            modified += 'E'
            words[i] = modified
        if word[-1] == 'a':
            for k in range(len(word)-1):
                modified += word[k]
            modified += 'A'
            words[i] = modified
        if word[-1] == 'i':
            for k in range(len(word)-1):
                modified += word[k]
            modified += 'Y'
            words[i] = modified
        if word[-1] == 'u':
            for k in range(len(word)-1):
                modified += word[k]
            modified += 'O'
            words[i] = modified
        
    import re
    output = " "
    words = sentence.split(" ")
    for j in range(len(words)):
        a = re.compile(r'a')
        words[j] = a.sub(r'', words[j])

        i = re.compile(r'i')
        words[j] = i.sub(r'', words[j])

        u = re.compile(r'u')
        words[j] = u.sub(r'', words[j])

        e = re.compile(r'e')
        words[j] = e.sub(r'E', words[j])

        o = re.compile(r'o')
        words[j] = o.sub(r'O', words[j])

#     print("Rule 12 >: ",words)
#     return output.join(words)
    print("Rule 11 >: ",words)
    return output.join(words)


In [13]:
# Rule 12
'''
Search for the following vowel sequences and make the following replacements.
    a = null, A
    i = null, Y
    u = null, O
    e = E
    o = O
'''
def rule_12(sentence):
    import re
    output = " "
    words = sentence.split(" ")
    for j in range(len(words)):
        a = re.compile(r'a')
        words[j] = a.sub(r'', words[j])

        i = re.compile(r'i')
        words[j] = i.sub(r'', words[j])

        u = re.compile(r'u')
        words[j] = u.sub(r'', words[j])

        e = re.compile(r'e')
        words[j] = e.sub(r'E', words[j])

        o = re.compile(r'o')
        words[j] = o.sub(r'O', words[j])

    print("Rule 12 >: ",words)
    return output.join(words)

In [46]:
sentence = "yeh kia hein"
real = sentence
sentence = rule_1 (sentence)
print(sentence)
sentence = rule_2(sentence)

sentence = rule_3(sentence)

sentence = rule_4(sentence)

sentence = rule_5(sentence)

sentence = rule_6_and_7(sentence)

sentence = rule_8(sentence)

sentence = rule_10(sentence)

sentence = rule_11(sentence)

sentence = rule_12(sentence)

Rule 1 >:  ['yeh', 'Kia', 'heiN']
yeh Kia heiN
Rule 2 >:  ['yeh', 'Kia', 'heiN']
Rule 3 >:  ['yeh', 'Kia', 'heiN']
Rule 4 >:  ['yH', 'Kia', 'HeiN']
Rule 5 >:  ['yH', 'Kia', 'HeiN']
Rule 6&7 >:  ['YH', 'Kia', 'HeiN']
Rule 8 >:  ['YH', 'Kia', 'HeiN']
Rule 10 >:  ['YH', 'Kia', 'HYN']
Rule 11 >:  ['YH', 'KiA', 'HYN']
Rule 12 >:  ['YH', 'KA', 'HYN']


In [44]:
print("Roman Urdu : ", real)
words = sentence.split(" ")
for k in range(len(words)):
    #print(words[k])
    print(transString(words[k],1) , end=" ")

Roman Urdu :  میرا نام عبداللہ ہے
MYRA NAM عBDALLہ ہE 

In [42]:
urdu_dict = {
            u"\u0627":"A",
            u"\u0627":"A", 
            u"\u0675":"A", 
            u"\u0673":"A", 
            u"\u0630":"A", 
            u"\u0622":"AA", 
            u"\u0628":"B", 
            u"\u067E":"P", 
            u"\u062A":"T", 
            u"\u0637":"T", 
            u"\u0679":"T", 
            u"\u062C":"J", 
            u"\u0633":"S", 
            u"\u062B":"S", 
            u"\u0635":"S", 
            u"\u0686":"CH", 
            u"\u062D":"H", 
            u"\u0647":"H", 
            u"\u0629":"H", 
            u"\u06DF":"H", 
            u"\u062E":"KH", 
            u"\u062F":"D", 
            u"\u0688":"D", 
            u"\u0630":"Z", 
            u"\u0632":"Z", 
            u"\u0636":"Z", 
            u"\u0638":"Z", 
            u"\u068E":"Z", 
            u"\u0631":"R", 
            u"\u0691":"R", 
            u"\u0634":"SH", 
            u"\u063A":"GH", 
            u"\u0641":"F", 
            u"\u06A9":"K", 
            u"\u0642":"K", 
            u"\u06AF":"G", 
            u"\u0644":"L", 
            u"\u0645":"M", 
            u"\u0646":"N", 
            u"\u06BA":"N", 
            u"\u0648":"O", 
            u"\u0649":"Y", 
            u"\u0626":"Y", 
            u"\u06CC":"Y", 

            u"\u06D2":"E", 
            u"\u06C1":"H",
            u"\u064A":"E"  ,
            u"\u06C2":"AH"  ,
            u"\u06BE":"H"  ,
            u"\u0639":"A"  ,
            u"\u0643":"K" ,
            u"\u0621":"A",
            u"\u0624":"O",
            u"\u060C":"" #seperator ulta comma
}

word = 'میرا نام عبداللہ ہے'
word1= 'YEH'
inv_map = {v: k for k, v in urdu_dict.items()}

for i, j in inv_map.items():
        word1 = word1.replace(i, j)
c=word1.join
print(c)

<built-in method join of str object at 0x000001FA15AD0AB0>


In [38]:
inv_map

{'A': 'ء',
 'Z': 'ڎ',
 'AA': 'آ',
 'B': 'ب',
 'P': 'پ',
 'T': 'ٹ',
 'J': 'ج',
 'S': 'ص',
 'CH': 'چ',
 'H': 'ھ',
 'KH': 'خ',
 'D': 'ڈ',
 'R': 'ڑ',
 'SH': 'ش',
 'GH': 'غ',
 'F': 'ف',
 'K': 'ك',
 'G': 'گ',
 'L': 'ل',
 'M': 'م',
 'N': 'ں',
 'O': 'ؤ',
 'Y': 'ی',
 'E': 'ي',
 'AH': 'ۂ',
 '': '،'}