# Working with the en_IN locale: numbers

## Setup

In [63]:
!locale -a

C
C.utf8
de_AT.utf8
de_BE.utf8
de_CH.utf8
de_DE.utf8
de_IT.utf8
de_LI.utf8
de_LU.utf8
en_AG
en_AG.utf8
en_AU.utf8
en_BW.utf8
en_CA.utf8
en_DK.utf8
en_GB.utf8
en_HK.utf8
en_IE.utf8
en_IL
en_IL.utf8
en_IN
en_IN.utf8
en_NG
en_NG.utf8
en_NZ.utf8
en_PH.utf8
en_SG.utf8
en_US.utf8
en_ZA.utf8
en_ZM
en_ZM.utf8
en_ZW.utf8
fr_BE.utf8
fr_CA.utf8
fr_CH.utf8
fr_FR.utf8
fr_LU.utf8
POSIX
sv_FI.utf8
sv_SE.utf8
vi_VN
vi_VN.utf8


In [64]:
import locale
loc = "en_IN.UTF-8"

try:
    from icu import Locale, NumberFormatter, CurrencyUnit, Precision, Notation
except:
    !pip install pyicu
    from icu import Locale, NumberFormatter, CurrencyUnit, Precision, Notation

try:
    locale.setlocale(locale.LC_ALL, loc)
except locale.Error:
    print("After the missing locale is installed, the runtime environment will restart, please rerun all code.")
    !sudo locale-gen {loc}
    !sudo update-locale
    import os
    os.kill(os.getpid(), 9)

## Introduction

We will look at how to handle locale sensitive number formatting using the [locale](https://docs.python.org/3/library/locale.html) and _PyICU_ modules, using the `en_IN` locale as an example.

Numeric opeerators available in the _locale_ module

    locale.format_string(format, val, grouping=False, monetary=False)
    locale.format(format, val, grouping=False, monetary=False)
    locale.currency(val, symbol=True, grouping=False, international=False)
    locale.str(float)
    locale.delocalize(string)
    locale.localize(string, grouping=False, monetary=False)
    locale.atof(string, func=float)
    locale.atoi(string)
    
PyICU provides a wrapper around [icu::NuberFormatter](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/numberformatter_8h.html#details) (icu4c)

`NumberFormatter` supports:

* Decimal Formatting
* Currencies
* Measurement Units
* Percentages
* Scientific Notation
* Compact Notation
    


## Currency

### Using the locale module

`locale.currency(value, symbol=True, grouping=False, international=False)`

__symbol__: Boolean paramter that includes or excludes the current locales' currency symbol from string. \
__grouping__: Applies or suppresses use of grouping seperators in resultant string. Grouping pattern and grouping seperator are locale sensitive. \
__International__: Controls whether the international currency symbol is used is sued instead of the locale's currency symbol.

For the `en_IN.UTF-8` locale, the _Indian rupee sign_ (₹) currency symbol is used. The comma is used as the grouping seperator, a period is used as the decimal seperator and a (2, 3) grouping pattern is used for the digits.

In [65]:
lst = [2.54, 4.0, 3, 9.95, 5.4]
print([locale.currency(i, grouping=True) for i in lst])

['₹ 2.54', '₹ 4.00', '₹ 3.00', '₹ 9.95', '₹ 5.40']


In [66]:
lst2 = [123452.54, 14367843.0, 120030, 900020.95, 5000000.0]
print([locale.currency(i, grouping=True) for i in lst2])


['₹ 1,23,452.54', '₹ 1,43,67,843.00', '₹ 1,20,030.00', '₹ 9,00,020.95', '₹ 50,00,000.00']


Depending on the `en_IN.UTF-8` locale definition on the OS being used

In [67]:
locale._override_localeconv["p_sep_by_space"] = 1
print([locale.currency(i, grouping=True) for i in lst2])


['₹ 1,23,452.54', '₹ 1,43,67,843.00', '₹ 1,20,030.00', '₹ 9,00,020.95', '₹ 50,00,000.00']


### Using PyICU


In [68]:
icu_loc = Locale.createCanonical(loc)
formatter = NumberFormatter.with_().unit(CurrencyUnit('INR')).locale(icu_loc)
# print([str(formatter.formatDouble(i)).replace('\xa0', ' ') for i in lst2])
print([str(formatter.formatDouble(i)) for i in lst2])

['₹1,23,452.54', '₹1,43,67,843.00', '₹1,20,030.00', '₹9,00,020.95', '₹50,00,000.00']


In [69]:
formatter2 = NumberFormatter.withLocale(icu_loc).unit(CurrencyUnit('INR'))
print([str(formatter2.formatDouble(i)) for i in lst2])

['₹1,23,452.54', '₹1,43,67,843.00', '₹1,20,030.00', '₹9,00,020.95', '₹50,00,000.00']


In [84]:
formatter3 = NumberFormatter.with_().unit(CurrencyUnit('INR')).locale(icu_loc).precision(Precision.minMaxFraction(2,2))
print([formatter3.formatDouble(i) for i in lst2])

['₹1,23,452.54', '₹1,43,67,843.00', '₹1,20,030.00', '₹9,00,020.95', '₹50,00,000.00']


In [85]:
formatter4 = NumberFormatter.with_().unit(CurrencyUnit('INR')).locale(icu_loc).precision(Precision.minMaxFraction(2,2)).notation(Notation.compactShort())
print([formatter4.formatDouble(i) for i in lst2])

['₹1.23L', '₹1.44Cr', '₹1.20L', '₹9.00L', '₹50.00L']


In [90]:
formatter5 = NumberFormatter.with_() \
    .unit(CurrencyUnit('INR')) \
    .locale(icu_loc) \
    .precision(Precision.minMaxFraction(2,2)) \
    .notation(Notation.compactShort())
print([formatter5.formatDouble(i) for i in lst2])

['₹1.23L', '₹1.44Cr', '₹1.20L', '₹9.00L', '₹50.00L']


---

In [88]:
import pdir
pdir(formatter3)

[0;33mspecial attribute:[0m
    [0;36m__class__[0m[1;30m, [0m[0;36m__doc__[0m
[0;33mabstract class:[0m
    [0;36m__subclasshook__[0m
[0;33mobject customization:[0m
    [0;36m__format__[0m[1;30m, [0m[0;36m__hash__[0m[1;30m, [0m[0;36m__init__[0m[1;30m, [0m[0;36m__new__[0m[1;30m, [0m[0;36m__repr__[0m[1;30m, [0m[0;36m__sizeof__[0m[1;30m, [0m[0;36m__str__[0m
[0;33mrich comparison:[0m
    [0;36m__eq__[0m[1;30m, [0m[0;36m__ge__[0m[1;30m, [0m[0;36m__gt__[0m[1;30m, [0m[0;36m__le__[0m[1;30m, [0m[0;36m__lt__[0m[1;30m, [0m[0;36m__ne__[0m
[0;33mattribute access:[0m
    [0;36m__delattr__[0m[1;30m, [0m[0;36m__dir__[0m[1;30m, [0m[0;36m__getattribute__[0m[1;30m, [0m[0;36m__setattr__[0m
[0;33mclass customization:[0m
    [0;36m__init_subclass__[0m
[0;33mpickle:[0m
    [0;36m__reduce__[0m[1;30m, [0m[0;36m__reduce_ex__[0m
[0;33mdescriptor:[0m
    [0;36mowned[0m[0;36m: [0m[1;30mclass getset_descriptor with get

In [73]:
pdir(Precision)

[0;33mspecial attribute:[0m
    [0;36m__class__[0m[1;30m, [0m[0;36m__doc__[0m
[0;33mabstract class:[0m
    [0;36m__subclasshook__[0m
[0;33mobject customization:[0m
    [0;36m__format__[0m[1;30m, [0m[0;36m__hash__[0m[1;30m, [0m[0;36m__init__[0m[1;30m, [0m[0;36m__new__[0m[1;30m, [0m[0;36m__repr__[0m[1;30m, [0m[0;36m__sizeof__[0m[1;30m, [0m[0;36m__str__[0m
[0;33mrich comparison:[0m
    [0;36m__eq__[0m[1;30m, [0m[0;36m__ge__[0m[1;30m, [0m[0;36m__gt__[0m[1;30m, [0m[0;36m__le__[0m[1;30m, [0m[0;36m__lt__[0m[1;30m, [0m[0;36m__ne__[0m
[0;33mattribute access:[0m
    [0;36m__delattr__[0m[1;30m, [0m[0;36m__dir__[0m[1;30m, [0m[0;36m__getattribute__[0m[1;30m, [0m[0;36m__setattr__[0m
[0;33mclass customization:[0m
    [0;36m__init_subclass__[0m
[0;33mpickle:[0m
    [0;36m__reduce__[0m[1;30m, [0m[0;36m__reduce_ex__[0m
[0;33mdescriptor:[0m
    [0;36mcurrency[0m[0;36m: [0m[1;30mclass classmethod_descriptor 

In [76]:
pdir(CurrencyUnit)

[0;33mspecial attribute:[0m
    [0;36m__class__[0m[1;30m, [0m[0;36m__doc__[0m
[0;33mabstract class:[0m
    [0;36m__subclasshook__[0m
[0;33marithmetic:[0m
    [0;36m__mul__[0m[1;30m, [0m[0;36m__pow__[0m[1;30m, [0m[0;36m__rmul__[0m[1;30m, [0m[0;36m__rpow__[0m[1;30m, [0m[0;36m__rtruediv__[0m[1;30m, [0m[0;36m__truediv__[0m
[0;33mobject customization:[0m
    [0;36m__format__[0m[1;30m, [0m[0;36m__hash__[0m[1;30m, [0m[0;36m__init__[0m[1;30m, [0m[0;36m__new__[0m[1;30m, [0m[0;36m__repr__[0m[1;30m, [0m[0;36m__sizeof__[0m[1;30m, [0m[0;36m__str__[0m
[0;33mrich comparison:[0m
    [0;36m__eq__[0m[1;30m, [0m[0;36m__ge__[0m[1;30m, [0m[0;36m__gt__[0m[1;30m, [0m[0;36m__le__[0m[1;30m, [0m[0;36m__lt__[0m[1;30m, [0m[0;36m__ne__[0m
[0;33mattribute access:[0m
    [0;36m__delattr__[0m[1;30m, [0m[0;36m__dir__[0m[1;30m, [0m[0;36m__getattribute__[0m[1;30m, [0m[0;36m__setattr__[0m
[0;33mclass customization:[0