# Sorani Kurdish data using Pandas plot: Plotly backend



## Setup

In [1]:
import pandas as pd
import locale, platform
import unicodedata as ud, regex as re
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import plotly.express as px

# Set plotly as backend
pd.set_option("plotting.backend", "plotly")

## Helper functions

In [2]:
def convert_digits(s, sep = (",", ".")):
    nd = re.compile(r'^-?\p{Nd}[,.\u066B\u066C\u0020\u2009\u202F\p{Nd}]*$')
    tsep, dsep = sep
    if nd.match(s):
        s = s.replace(tsep, "")
        s = ''.join([str(ud.decimal(c, c)) for c in s])
        if dsep in s:
            return float(s.replace(dsep, ".")) if dsep != "." else float(s)
        return int(s)
    return s

seps = ("\u066C", "\u066B")
digitsconv = lambda x: convert_digits(x.replace("-", "٠"), sep = seps)

## Read data

In [3]:
import pandas as pd
conv = {
    'سووریا': digitsconv,
    'عێراق': digitsconv,
    'ئێران': digitsconv,
    'تورکیا': digitsconv,
    'جیھانی': digitsconv
}
df = pd.read_table("../data/demographics.tsv", converters=conv)
df

Unnamed: 0,---,جیھانی,تورکیا,ئێران,عێراق,سووریا
0,کرمانجی,14419000,7919000,443000,3185000,1661000
1,ئەوانەی بە تورکی دەدوێن,5732000,5732000,0,0,0
2,باشوور,3381000,0,3381000,0,0
3,سۆرانی,1576000,0,502000,567000,0
4,زازایی - دەملی,1125000,1125000,0,0,0
5,زازایی - ئەلڤێکا,184000,179000,0,0,0
6,ڕەوەند,90000,38000,20000,33000,0
7,ھەورامی,54000,0,26000,28000,0
8,شکاکی,49000,23000,26000,0,0
9,کۆی گشتی,26712000,15016000,4398000,3916000,1661000


In [4]:
col_list=["تورکیا" ,"ئێران" ,"عێراق" ,"سووریا"]

total_df = df[col_list].sum(axis=0)
print(total_df)

تورکیا    30032000
ئێران      8796000
عێراق      7729000
سووریا     3322000
dtype: int64


## Generate plot

The function `convert_to_arab_ns()` converts an integer or floating point number to its equivalent Eastern Arabic numeric form.

In [5]:
def convert_to_arab_ns(n, p=None, decimal=2, sep_in=["", "."], sep_out=["\u066C", "\u066B"], scale=None):
    locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
    decimal_places = decimal
    if sep_in == ["", "."]:
        n = n * scale if scale else n
        format_string = '%0.' + str(decimal_places) + 'f' if type(n) == float else '%d'
        n = locale.format_string(format_string, n, grouping=True, monetary=True)
        n = n.replace(",", "ṯ").replace(".", "ḏ")
        #n = str(n)
    if sep_in[0] in [" ", ",", "٬", "\u2009"]:
        n = n.replace(r'[\u0020,٬\u2009]', "ṯ")
    elif sep_in[0] == ".":
        n = n.replace(".", "ṯ")
    if sep_in[1] in [",", ".", "٫"]:
        n = n.replace(r'[,.٫]', "ḏ")
    sep = sep_out
    t = n.maketrans("0123456789", "٠١٢٣٤٥٦٧٨٩")
    locale.setlocale(locale.LC_ALL, "")
    return n.translate(t).replace("ṯ", sep[0] ).replace("ḏ", sep[1])

Generate the figure which will then be updated as required:

In [6]:
fig = total_df.plot(kind="bar", title='ڕێژەی دانیشتووانی کورد')

The function `fig.full_figure_for_development()` is used to identify the y-axis tick labels used in the plot. These are not directly accessible via the standard `fig.layout.yaxis` object. `fig.full_figure_for_development()` was intended for use during development, rather than production, but it provides the only way of determining the `dtick` values calculated by the plot. The standard `fig.layout.yaxis.dtick` would return the explicit value set by the developer, rather than the generated value. If no value was set, it returns `None`.

Alternatively, a developer could explicitly set a `range` and `dtick`, avoiding the need for `fig.full_figure_for_development()`.

The code then calculates the tick values, and converts those values into Eastern Arabic numerals, which are then set as the textual values to assign as tick labels.

In [7]:
%%capture
full_fig = fig.full_figure_for_development()

yaxis_range_min = int(full_fig.layout.yaxis.range[0])
yaxis_range_max = int(full_fig.layout.yaxis.range[1])
yaxis_dtick = int(full_fig.layout.yaxis.dtick)

yaxis_tickvals = [item for item in range(yaxis_range_min, yaxis_range_max, yaxis_dtick)]
yaxis_ticktext = [convert_to_arab_ns(item, decimal=0, scale=0.000001) for item in yaxis_tickvals]

In [9]:
fig.update_layout(
    title={
        'text': 'ڕێژەی دانیشتووانی کورد',
        'y':0.90,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis={
        "title":"ناوچە",
        "autorange": 'reversed'},
    yaxis={
        "title": "ڕێژەی دانیشتووان (بە ملیۆن)",
        "side": "right",
        "tickmode": "array",
        "tickvals": yaxis_tickvals,
        "ticktext": yaxis_ticktext},
    font=dict(
        family="Vazirmatn",
        size=14,
        color="Grey"
    ),
    showlegend=False
)

fig.show()