In [13]:
# !pip install vietnam-number

In [14]:
from vietnam_number import w2n

In [15]:
w2n("Hai mươi hai nghìn bốn trăm linh ba")

22403

In [16]:
from typing import List, Tuple, Union

Digit = Union[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Period = Tuple[Digit, Digit, Digit]
InputNumber = Union[str, int]

class RvnError(Exception):
    pass

class InvalidFormatError(RvnError):
    pass

class InvalidNumberError(RvnError):
    pass

class NotEnoughUnitError(RvnError):
    pass

class NumberData:
    def __init__(self, is_negative: bool, integral_part: List[Period], fractional_part: List[Digit]):
        self.is_negative = is_negative
        self.integral_part = integral_part
        self.fractional_part = fractional_part

class ReadingConfig:
    def __init__(self):
        self.separator = ' '
        self.unit = ['đơn', 'vị']
        self.negative_sign = '-'
        self.point_sign = '.'
        self.thousand_sign = ','
        self.period_size = 3
        self.filled_digit = '0'

        self.digits = ['không', 'một', 'hai', 'ba', 'bốn', 'năm', 'sáu', 'bảy', 'tám', 'chín']
        self.units = [[], ['nghìn'], ['triệu'], ['tỉ'], ['nghìn', 'tỉ'], ['triệu', 'tỉ'], ['tỉ', 'tỉ']]

        self.negative_text = 'âm'
        self.point_text = 'chấm'
        self.odd_text = 'lẻ'
        self.ten_text = 'mười'
        self.hundred_text = 'trăm'

        self.one_tone_text = 'mốt'
        self.four_tone_text = 'tư'
        self.five_tone_text = 'lăm'
        self.ten_tone_text = 'mươi'

def trim_left(s: str, char: str) -> str:
    if s == '':
        return ''
    pos = 0
    while s[pos] == char[0]:
        pos += 1
    return s[pos:]

def trim_right(s: str, char: str) -> str:
    if s == '':
        return ''
    last_pos = len(s) - 1
    while s[last_pos] == char[0]:
        last_pos -= 1
    return s[:last_pos + 1]

def split_to_digits(s: str) -> List[Digit]:
    digits = [int(digit) if digit.isdigit() and 0 <= int(digit) <= 9 else None for digit in s]
    return digits if None not in digits else None

def validate_number(value: InputNumber) -> str:
    if isinstance(value, str):
        return value
    elif isinstance(value, int):
        return str(value)
    else:
        raise InvalidFormatError('Invalid format')

def read_last_two_digits(config: ReadingConfig, b: Digit, c: Digit, read_zero_ten: bool) -> List[str]:
    output = []
    if b == 0:
        if read_zero_ten and c != 0:
            output.append(config.digits[b])
        output.append(config.digits[c])
    elif b == 1:
        output.append(config.ten_text)
        if c == 5:
            output.append(config.five_tone_text)
        elif c != 0:
            output.append(config.digits[c])
    else:
        output.extend([config.digits[b], config.ten_tone_text])
        if c == 1:
            output.append(config.one_tone_text)
        elif c == 4:
            output.append(config.four_tone_text)
        elif c == 5:
            output.append(config.five_tone_text)
        elif c != 0:
            output.append(config.digits[c])
    return output

def read_three_digits(config: ReadingConfig, a: Digit, b: Digit, c: Digit, read_zero_hundred: bool) -> List[str]:
    output = []
    has_hundred = a != 0 or read_zero_hundred
    if has_hundred:
        output.extend([config.digits[a], config.hundred_text])
    if has_hundred and b == 0:
        if c == 0:
            return output
        output.append(config.odd_text)
    output.extend(read_last_two_digits(config, b, c, False))
    return output

def remove_thousands_separators(config: ReadingConfig, number: str) -> str:
    return number.replace(config.thousand_sign, '')

def trim_redundant_zeros(config: ReadingConfig, number: str) -> str:
    if config.point_sign in number:
        return trim_left(trim_right(number, config.filled_digit), config.filled_digit)
    return trim_left(number, config.filled_digit)

def add_leading_zeros_to_fit_period(config: ReadingConfig, number: str) -> str:
    new_length = (len(number) + config.period_size - 1) // config.period_size * config.period_size
    return number.rjust(new_length, config.filled_digit)

def zip_integral_periods(config: ReadingConfig, digits: List[Digit]) -> List[Period]:
    output = []
    period_count = (len(digits) + config.period_size - 1) // config.period_size
    for i in range(period_count):
        period = digits[i * config.period_size:(i + 1) * config.period_size]
        output.append(tuple(period + [0] * (config.period_size - len(period))))
    return output

def parse_number_data(config: ReadingConfig, number: str) -> NumberData:
    number_string = remove_thousands_separators(config, number)

    is_negative = number_string.startswith(config.negative_sign)
    number_string = number_string[len(config.negative_sign):] if is_negative else number_string
    number_string = trim_redundant_zeros(config, number_string)

    point_pos = number_string.find(config.point_sign)
    integral_string = number_string if point_pos == -1 else number_string[:point_pos]
    fractional_string = '' if point_pos == -1 else number_string[point_pos + 1:]
    integral_string = add_leading_zeros_to_fit_period(config, integral_string)

    integral_digits = split_to_digits(integral_string)
    fractional_digits = split_to_digits(fractional_string)
    if integral_digits is None:
        raise InvalidNumberError('Invalid integral part')
    if fractional_digits is None:
        raise InvalidNumberError('Invalid fractional part')

    integral_part = zip_integral_periods(config, integral_digits)
    if not integral_part:
        integral_part.append((0, 0, 0))
    elif len(integral_part) > len(config.units):
        raise NotEnoughUnitError('Unit not enough')

    return NumberData(is_negative, integral_part, fractional_digits)

def read_integral_part(config: ReadingConfig, periods: List[Period]) -> List[str]:
    output = []
    is_single_period = len(periods) == 1
    for index, period in enumerate(periods):
        is_first_period = index == 0
        a, b, c = period
        if a != 0 or b != 0 or c != 0 or is_single_period:
            output.extend(
                read_three_digits(config, a, b, c, not is_first_period) +
                config.units[len(periods) - 1 - index]
            )
    return output

def read_fractional_part(config: ReadingConfig, digits: List[Digit]) -> List[str]:
    output = []
    if len(digits) == 2:
        b, c = digits
        output.extend(read_last_two_digits(config, b, c, True))
    elif len(digits) == 3:
        a, b, c = digits
        output.extend(read_three_digits(config, a, b, c, True))
    else:
        output.extend(config.digits[digit] for digit in digits)
    return output

def read_number(config: ReadingConfig, number_data: NumberData) -> str:
    output = []
    output.extend(read_integral_part(config, number_data.integral_part))
    if number_data.fractional_part:
        output.extend([config.point_text] + read_fractional_part(config, number_data.fractional_part))
    if number_data.is_negative:
        output.insert(0, config.negative_text)
    output.extend(config.unit)
    return config.separator.join(output)

def do_read_number(config: ReadingConfig, number: InputNumber) -> str:
    validated_number = validate_number(number)
    number_data = parse_number_data(config, validated_number)
    return read_number(config, number_data)


In [22]:
do_read_number(ReadingConfig(), "2003.0873")

'hai nghìn không trăm lẻ ba chấm không tám bảy ba đơn vị'