###  _uroman_ package examples
by Ulf Hermjakob, June 28, 2024 (uroman version 1.3.1.1)

In [None]:
# pip install uroman

import uroman as ur

# load uroman data (once at the beginning)
uroman = ur.Uroman()

In [None]:
# Romanizing strings from different scripts

for s in ('Νεπάλ', 'नेपाल', 'نیپال', '三万一'):
    print(s, uroman.romanize_string(s))

In [None]:
# Romanizing a string using different language codes

s = 'Игорь'
for lang_code in ('rus', 'ukr', None):
    print(s, lang_code, uroman.romanize_string(s, lcode=lang_code))

In [None]:
# Romanizing a string in different output formats (string, JSONL)

import sys

s = 'ایران'
lcode = 'fas'

input_filename = 'in.txt'
with open(input_filename, 'w') as f: 
    f.write(s + '\n')
print(f"Input string:\n{s}\n")
    
format_output_list = ((ur.RomFormat.STR,     'str.txt'), 
                      (ur.RomFormat.EDGES,   'edges.jsonl'), 
                      (ur.RomFormat.ALTS,    'alts.jsonl'), 
                      (ur.RomFormat.LATTICE, 'lattice.jsonl'))

for rom_format, output_filename in format_output_list:
    uroman.romanize_file(input_filename=input_filename, 
                         output_filename=output_filename, 
                         lcode=lcode,
                         rom_format=rom_format)
    with open(output_filename) as f:
        sys.stdout.write(f"Output format: {rom_format}\n{f.read()}\n")        

In [None]:
# A file with text from different scripts and languages (marked by line-initial ::lcode <lcode>)

input_filename  = 'multi-script.txt'

with open(input_filename) as f:
    print(f.read())

In [None]:
# Romanization of a file with text from different scripts and languages
# How many US states can you identify in the romanized Hindi (::lcode hin) sentence below?

output_filename = 'multi-script.uroman.txt'

uroman.romanize_file(input_filename=input_filename, output_filename=output_filename)

with open(output_filename) as f:
    print(f.read())