# Phonetic Transliteration, Classical Approach

The purpose of this notebook is to document the, now abandoned, attempt to transliterate Tibetan without using a model.

In [5]:
import pyewts
import re
from transformers import pipeline

## Sample Text

https://www.lotsawahouse.org/tibetan-masters/fifteenth-karmapa/resplendence-guru-yoga

རང་གི་སྤྱི་བོར་ཆུ་སྐྱེས་གེ་སར་ལྟེར། །
ཐབས་ཤེས་ཉི་ཟླ་མཛེས་པའི་ཁྲི་སྟེང་ནས། །
དུས་གསུམ་རྒྱལ་ཀུན་ཡེ་ཤེས་འདུས་པའི་གཟུགས། །
དཀོན་མཆོག་ཀུན་འདུས་བླ་མ་ཀརྨ་པ། །
ཞི་འཛུམ་བདེ་ཆེན་རྒྱས་པའི་ཉམས་བརྒྱ་འབར། །
ཕྱག་གཉིས་བདེ་སྟོང་རྡོར་དྲིལ་ཐུགས་ཀར་བསྣོལ། །
ཞབས་ཟུང་རྡོ་རྗེའི་སྐྱིལ་མོ་ཀྲུང་གིས་བཞུགས། །
དབུ་ལ་ཞྭ་ནག་སྐུ་ལ་ཆོས་གོས་གསོལ། །

rang gi chiwor chukyé gesar ter
tabshé nyida dzepé tri teng né
dü sum gyal kün yeshe düpé zuk
könchok kündü lama karma pa
zhi dzum dechen gyepé nyam gya bar
chak nyi detong dordril tukkar nol
zhabzung dorjé kyilmo trung gi zhuk
u la zha nak ku la chögö sol

At the crown of my head, in the heart of a blossoming lotus,
On a throne adorned with sun and moon discs of method and wisdom,
Sits the embodiment of the wisdom of all the Victors of the three times,
Embodiment of the Three Jewels, Guru Karmapa,
Blazing with the intensity of great bliss, wearing a smile,
With two hands holding a vajra and bell of bliss and emptiness crossed at the heart,
Sitting with legs crossed in vajra posture,
Wearing a black crown, clad in Dharma robes.

In [38]:
translator = pipeline('translation', 'billingsmoore/mlotsawa', device=0, max_length=100)



In [12]:
tibetan = """
རང་གི་སྤྱི་བོར་ཆུ་སྐྱེས་གེ་སར་ལྟེར། །
ཐབས་ཤེས་ཉི་ཟླ་མཛེས་པའི་ཁྲི་སྟེང་ནས། །
དུས་གསུམ་རྒྱལ་ཀུན་ཡེ་ཤེས་འདུས་པའི་གཟུགས། །
དཀོན་མཆོག་ཀུན་འདུས་བླ་མ་ཀརྨ་པ། །
ཞི་འཛུམ་བདེ་ཆེན་རྒྱས་པའི་ཉམས་བརྒྱ་འབར། །
ཕྱག་གཉིས་བདེ་སྟོང་རྡོར་དྲིལ་ཐུགས་ཀར་བསྣོལ། །
ཞབས་ཟུང་རྡོ་རྗེའི་སྐྱིལ་མོ་ཀྲུང་གིས་བཞུགས། །
དབུ་ལ་ཞྭ་ནག་སྐུ་ལ་ཆོས་གོས་གསོལ། །
"""

## Confirm That a Text is in Tibetan Script

In [9]:
def contains_tibetan(text):
    # Define the Tibetan Unicode range
    tibetan_range = range(0x0F00, 0x0FFF + 1)
    
    # Check each character in the string
    for char in text:
        if ord(char) in tibetan_range:
            return True
    return False

# Example usage
result = contains_tibetan(wylie[0])
print(result)  # Output: True if Tibetan characters are present


False


## Using the PyEWTS library

In [15]:
converter = pyewts.pyewts()
wylie = converter.toWylie(tibetan)

In [16]:
wylie

"\nrang gi spyi bor chu skyes ge sar lter/_/\nthabs shes nyi zla mdzes pa'i khri steng nas/_/\ndus gsum rgyal kun ye shes 'dus pa'i gzugs/_/\ndkon mchog kun 'dus bla ma karma pa/_/\nzhi 'dzum bde chen rgyas pa'i nyams brgya 'bar/_/\nphyag gnyis bde stong rdor dril thugs kar bsnol/_/\nzhabs zung rdo rje'i skyil mo krung gis bzhugs/_/\ndbu la zhwa nag sku la chos gos gsol/_/\n"

In [58]:
wylie_translation = translator(wylie)
wylie_translation = [elt['translation_text'] for elt in wylie_translation]
wylie_translation

['according to the nyingma school',
 'the sun shines twice a day.',
 'an extensive commentary on the three',
 'in the english language',
 'chen snying thig rtsa pod shechen publications',
 'verses of happiness and wellbeing upon entering the city of vail',
 'the team played well in the contest.',
 'nyala pema dddul recognized ngawang tendzin as a future disciple']

## Converting Wylie To Phonetic

In [59]:
# this is a list, each element of which is a pair. the first element of the pair is a list of wylie strings and the second element is the string it should be replaced with for THL

replacements = [[['lth', 'rh', 'db'] , ' '],
    [['rb', 'sb', 'sbr', 'lb', '’b', '\'b'] , 'b'],
    [['c', 'cw', 'gc', 'bc', 'lc', 'py', 'lpy', 'spy', 'dpy', 'mch', '’ch', '\'ch', 'phy', '’phy', '\'phy'] , 'ch'],
    [['rd', 'sd', 'gd', 'bd', 'brd', 'bsd', 'zl', 'bzl', 'ld', 'md', '’d', '\'d', 'dw'] , 'd'],
    [['rgr', 'lgr', 'sgr', 'dgr', 'dbr', 'bsgr', 'rbr', 'lbr', 'sbr', 'mgr', '’gr', '\'gr', '’dr', '\'dr', '’br', '\'br', 'gr', 'br', 'grw'] , 'dr'],
    [['rdz', 'gdz', 'brdz', 'mdz', '’dz', '\'dz'] , 'dz'],
    [['rg', 'lg', 'sg', 'dg', 'bg', 'brg', 'bsg', 'lg', 'mg', '’g', '\'g', 'gw'] , 'g'],
    [['rgy', 'lgy', 'sgy', 'dgy', 'bgy', 'brgy', 'bsgy', 'mgy', '’gy', '\'gy'] , 'gy'],
    [['hw'] , 'h'],
    [['rby', 'lby', 'sby', 'rj', 'gj', 'brj', 'lj', 'mj', '’j', '\'j', '’by', '\'by', 'by'] , 'j'],
    [['rk', 'lk', 'sk', 'kw', 'dk', 'bk', 'brk', 'bsk'] , 'k'],
    [['khw', 'mkh', '’kh', '\'kh'] , 'kh'],
    [['mkhy', '’khy', '\'khy'] , 'khy'],
    [['rky', 'lky', 'sky', 'dky', 'bky', 'brky', 'bsky'] , 'ky'],
    [['kl', 'gl', 'bl', 'rl', 'sl', 'brl', 'bsl', 'lw'] , 'l'],
    [['rm', 'sm', 'dm', 'smr', 'mr'] , 'm'],
    [['rn', 'sn', 'gn', 'brn', 'bsn', 'mn'] , 'n'],
    [['rng', 'lng', 'sng', 'dng', 'brng', 'bsng', 'mng'] , 'ng'],
    [['rny', 'sny', 'gny', 'brny', 'bsny', 'mny', 'nyw', 'rmy', 'smy', 'my'] , 'ny'],
    [['sp', 'dp', 'lp', 'ph', '’ph', '\'ph'] , 'p'],
    [['rw'] , 'r'],
    [['sr', 'sw', 'gs', 'bs', 'bsr'] , 's'],
    [['shw', 'gsh', 'bsh'] , 'sh'],
    [['rt', 'lt', 'st', 'tw', 'gt', 'bt', 'brt', 'blt', 'bst', 'bld', 'th', 'mth', '’th' '\'th'] , 't'],
    [['kr', 'rkr', 'lkr', 'skr', 'pr', 'lpr', 'spr', 'dkr', 'dpr', 'bkr', 'bskr', 'bsr', 'khr', 'thr', 'phr', 'mkhr', '’khr', '’phr'] , 'tr'],
    [['rts', 'sts', 'rtsw', 'stsw', 'gts', 'bts', 'brts', 'bsts', 'tsh', 'tshw', 'mtsh', '’tsh', '\'tsh'] , 'ts'],
    [['db', 'b'] , 'w'],
    [['g.y', 'dby'] , 'y'],
    [['zw', 'gz', 'bz'] , 'z'],
    [['zh', 'zhw', 'gzh', 'bzh'] , 'zh']]

In [60]:
def wylie_to_phonetic(wylie):
    phonetic = []
    for line in wylie:
        if line != '':
            # perform basic replacements
            result = line
            for elt in replacements:
                replace_list = elt[0]
                for string in replace_list:
                    result = result.replace(string, elt[1])

            # remove non-alphabetical chars
            result = re.sub(r'[^a-zA-Z\s]', '', result)
            phonetic.append(result)
    return phonetic

phonetic = wylie_to_phonetic(wylie)

In [61]:
phonetic

['rang gi schi wor chhu kyes ge sar ter',
 'tas shes nyi da dzes pai tri teng nas',
 'dus sum gyal kun ye shes dus pai zus',
 'kon chhog kun dus la ma kama pa',
 'zhi dzum de chhen gyas pai nyams gya war',
 'chag nyis de tong dor dril tus kar wnol',
 'zhas zung do jei kyil mo trung gis wzhus',
 ' u la zha nag ku la chhos gos sol']

In [62]:
phonetic_translation = translator(phonetic)
phonetic_translation = [elt['translation_text'] for elt in phonetic_translation]
phonetic_translation

['through the power of your natural renunciation you spontaneously preclude',
 'the sun and moon sit on a lotus and moon disc',
 'the buddhas of past present and future all know this to be the case',
 'i make offerings to all the buddhas and bodhisattvas',
 'smiling serenely at a hundredthousandfold temple',
 'the two hands hold a bell and a white vajra',
 'i was born into a family of four.',
 'he wears a hat on his head and is adorned with the dharma robes']